From 92d3f94ee1400e7d924f2cd628f3afc93c465c00 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Fri, 17 May 2024 10:24:20 +0200 Subject: [PATCH 1/5] Added changes so fesom2 can compile with OpenACC. Have to check the namelist files --- CMakeLists.txt | 2 +- env.sh | 9 +++++++-- env/levante.dkrz.de/shell.nvhpc | 7 ++++--- src/CMakeLists.txt | 13 +++++++------ src/ice_fct.F90 | 4 ++++ 5 files changed, 23 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 42be9544a..92d473451 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.16) # set default build type cache entry (do so before project(...) is called, which would create this cache entry on its own) if(NOT CMAKE_BUILD_TYPE) message(STATUS "setting default build type: Release") - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.") + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.") endif() project(FESOM2.0) diff --git a/env.sh b/env.sh index 6fff86847..4b3aace25 100755 --- a/env.sh +++ b/env.sh @@ -54,7 +54,12 @@ elif [[ $LOGINHOST =~ ^m[A-Za-z0-9]+\.hpc\.dkrz\.de$ ]]; then STRATEGY="mistral.dkrz.de" elif [[ $LOGINHOST =~ ^levante ]] || [[ $LOGINHOST =~ ^l[:alnum:]+\.lvt\.dkrz\.de$ ]]; then STRATEGY="levante.dkrz.de" -elif [[ $LOGINHOST =~ ^ollie[0-9]$ ]] || [[ $LOGINHOST =~ ^prod-[0-9]{4}$ ]]; then + # following regex only matches if input is 2 word like levante.nvhpc, this enables using different shells for a machine directly + compid_regex="^([[:alnum:]]+)\.([[:alnum:]]+)$" + if [[ $LOGINHOST =~ $compid_regex ]]; then + COMPILERID="${BASH_REMATCH[2]}" + fi + elif [[ $LOGINHOST =~ ^ollie[0-9]$ ]] || [[ $LOGINHOST =~ ^prod-[0-9]{4}$ ]]; then STRATEGY="ollie" elif [[ $LOGINHOST =~ ^albedo[0-9]$ ]] || [[ $LOGINHOST =~ ^prod-[0-9]{4}$ ]]; then STRATEGY="albedo" @@ -112,4 +117,4 @@ else echo "Sourcing $(realpath $SHELLFILE) for environment" source $SHELLFILE echo "$(realpath ${SHELLFILE})" > $DIR/bin/current_shell_path -fi +fi \ No newline at end of file diff --git a/env/levante.dkrz.de/shell.nvhpc b/env/levante.dkrz.de/shell.nvhpc index eb2b776f6..5bf73e871 100755 --- a/env/levante.dkrz.de/shell.nvhpc +++ b/env/levante.dkrz.de/shell.nvhpc @@ -5,9 +5,10 @@ export CPU_MODEL=AMD_EPYC_ZEN3 module --force purge # module load intel-oneapi-compilers/2022.0.1-gcc-11.2.0 # module load openmpi/4.1.2-intel-2021.5.0 -module load nvhpc/22.5-gcc-11.2.0 -module load openmpi/.4.1.4-nvhpc-22.5 +module load nvhpc/23.9-gcc-11.2.0 +module load openmpi/4.1.6-nvhpc-23.9 export FC=mpif90 CC=mpicc CXX=mpicxx; +# export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH module load netcdf-c/4.8.1-openmpi-4.1.2-intel-2021.5.0 module load netcdf-fortran/4.5.3-openmpi-4.1.2-intel-2021.5.0 @@ -28,4 +29,4 @@ export UCX_TLS=mm,knem,cma,dc_mlx5,dc_x,self export UCX_UNIFIED_MODE=y export HDF5_USE_FILE_LOCKING=FALSE export OMPI_MCA_io="romio321" -export UCX_HANDLE_ERRORS=bt +export UCX_HANDLE_ERRORS=bt \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 509e899a4..c7159844f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -269,16 +269,17 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL Cray ) endif() elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL NVHPC ) target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_NVHPC_WORKAROUNDS) - target_compile_options(${PROJECT_NAME} PRIVATE -fast -fastsse -O3 -Mallocatable=95 -Mr8 -pgf90libs) + target_compile_options(${PROJECT_NAME} PRIVATE -Mnofma -Mallocatable=95 -Mr8 -pgf90libs) if(ENABLE_OPENACC) # additional compiler settings - target_compile_options(${PROJECT_NAME} PRIVATE -acc -ta=tesla:${NV_GPU_ARCH} -Minfo=accel) - set(CMAKE_EXE_LINKER_FLAGS "-acc -ta=tesla:${NV_GPU_ARCH}") + message("Taking ENABLE_OPENACC = ON") + target_compile_options(${PROJECT_NAME} PRIVATE -O2 -acc -gpu=${NV_GPU_ARCH} -Minfo=accel) + set(CMAKE_EXE_LINKER_FLAGS "-acc -gpu=${NV_GPU_ARCH}") endif() if(ENABLE_OPENMP) target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast) - else() - target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast,inline) + # else() + # target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast,inline) endif() endif() @@ -316,4 +317,4 @@ target_link_libraries(${PROJECT_NAME}.x PUBLIC ${PROJECT_NAME}) ### Export and installation -fesom_export(TARGETS ${PROJECT_NAME} parms fesom.x) +fesom_export(TARGETS ${PROJECT_NAME} parms fesom.x) \ No newline at end of file diff --git a/src/ice_fct.F90 b/src/ice_fct.F90 index d60bf352d..df7fe3948 100755 --- a/src/ice_fct.F90 +++ b/src/ice_fct.F90 @@ -1122,7 +1122,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) call exchange_nod(ice_temp, partit, luse_g2g = .true.) #endif +#ifndef ENABLE_OPENACC +!$OMP PARALLEL DO +#else !$ACC END DATA +#endif !$OMP BARRIER end subroutine ice_fem_fct From 59b0765c657ad9d05af113f9f288990664a6c458 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Fri, 17 May 2024 14:46:36 +0200 Subject: [PATCH 2/5] Working OpenACC code after commenting 'set(CMAKE_EXE_LINKER_FLAGS)' --- CMakeLists.txt | 2 +- src/CMakeLists.txt | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 92d473451..6fb86280a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.16) # set default build type cache entry (do so before project(...) is called, which would create this cache entry on its own) if(NOT CMAKE_BUILD_TYPE) message(STATUS "setting default build type: Release") - set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.") + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.") endif() project(FESOM2.0) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c7159844f..cc34a529f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -58,8 +58,9 @@ endif() option(ENABLE_OPENACC "compile with OpenACC support" OFF) message(STATUS "ENABLE_OPENACC: ${ENABLE_OPENACC}") - -set(NV_GPU_ARCH "cc80" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)") +option(DISABLE_OPENACC_ATOMICS "disable kernels using atomic statement for reproducible results" ON) +set(GPU_COMPUTE_CAPABILITY "cc80" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)") +set(GPU_FLAGS "cuda12.2,${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nvfortran compiler (cc35,cc50,cc60,cc70,cc80,...)") option(ENABLE_OPENMP "build FESOM with OpenMP" OFF) message(STATUS "ENABLE_OPENMP: ${ENABLE_OPENMP}") @@ -256,7 +257,7 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL GNU ) elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL Cray ) #target_compile_options(${PROJECT_NAME} PRIVATE -c -emf -hbyteswapio -hflex_mp=conservative -hfp1 -hadd_paren -Ounroll0 -hipa0 -r am -s real64 -N 1023 -g -G2 -O3) target_compile_options(${PROJECT_NAME} PRIVATE -c -emf -hbyteswapio -hflex_mp=conservative -hfp1 -hadd_paren -Ounroll0 -hipa0 -r am -s real64 -N 1023 -g -G2 -O2 -hnoacc -M878) #-hnoacc is a workaround for cray automatically activate -hacc, -M878 is to suppress ftn-878 warning - if(ENABLE_OPENMP) + if(${ENABLE_OPENMP}) target_compile_options(${PROJECT_NAME} PRIVATE -homp) else() target_compile_options(${PROJECT_NAME} PRIVATE -hnoomp) @@ -269,14 +270,19 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL Cray ) endif() elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL NVHPC ) target_compile_definitions(${PROJECT_NAME} PRIVATE ENABLE_NVHPC_WORKAROUNDS) + #target_compile_options(${PROJECT_NAME} PRIVATE -fast -fastsse -O3 -Mallocatable=95 -Mr8 -pgf90libs) target_compile_options(${PROJECT_NAME} PRIVATE -Mnofma -Mallocatable=95 -Mr8 -pgf90libs) - if(ENABLE_OPENACC) + if(${ENABLE_OPENACC}) # additional compiler settings message("Taking ENABLE_OPENACC = ON") - target_compile_options(${PROJECT_NAME} PRIVATE -O2 -acc -gpu=${NV_GPU_ARCH} -Minfo=accel) - set(CMAKE_EXE_LINKER_FLAGS "-acc -gpu=${NV_GPU_ARCH}") + target_compile_options(${PROJECT_NAME} PRIVATE -acc -O2 -gpu=${GPU_FLAGS} -Minfo=accel) + # set(CMAKE_EXE_LINKER_FLAGS "-acc -gpu=${GPU_FLAGS}") + if(${DISABLE_OPENACC_ATOMICS}) + message("Taking DISABLE_OPENACC_ATOMICS = ON") + target_compile_definitions(${PROJECT_NAME} PRIVATE DISABLE_OPENACC_ATOMICS) + endif() endif() - if(ENABLE_OPENMP) + if(${ENABLE_OPENMP}) target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast) # else() # target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast,inline) From ff4b4252e412d49a5b280e837f7c6f02d0b7d375 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Tue, 21 May 2024 13:13:04 +0200 Subject: [PATCH 3/5] Updated the changes regarding unneccesarry spaces and changed all ENABLE_OPENMP to in src/CMakeLists.txt --- CMakeLists.txt | 2 +- env.sh | 2 +- env/levante.dkrz.de/shell.nvhpc | 2 +- src/CMakeLists.txt | 10 +++++----- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6fb86280a..42be9544a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.16) # set default build type cache entry (do so before project(...) is called, which would create this cache entry on its own) if(NOT CMAKE_BUILD_TYPE) message(STATUS "setting default build type: Release") - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.") + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.") endif() project(FESOM2.0) diff --git a/env.sh b/env.sh index 4b3aace25..6c5d9b1f5 100755 --- a/env.sh +++ b/env.sh @@ -117,4 +117,4 @@ else echo "Sourcing $(realpath $SHELLFILE) for environment" source $SHELLFILE echo "$(realpath ${SHELLFILE})" > $DIR/bin/current_shell_path -fi \ No newline at end of file +fi diff --git a/env/levante.dkrz.de/shell.nvhpc b/env/levante.dkrz.de/shell.nvhpc index 5bf73e871..f0ae54531 100755 --- a/env/levante.dkrz.de/shell.nvhpc +++ b/env/levante.dkrz.de/shell.nvhpc @@ -29,4 +29,4 @@ export UCX_TLS=mm,knem,cma,dc_mlx5,dc_x,self export UCX_UNIFIED_MODE=y export HDF5_USE_FILE_LOCKING=FALSE export OMPI_MCA_io="romio321" -export UCX_HANDLE_ERRORS=bt \ No newline at end of file +export UCX_HANDLE_ERRORS=bt diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cc34a529f..ae6283a09 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -64,7 +64,7 @@ set(GPU_FLAGS "cuda12.2,${GPU_COMPUTE_CAPABILITY}" CACHE STRING "GPU arch for nv option(ENABLE_OPENMP "build FESOM with OpenMP" OFF) message(STATUS "ENABLE_OPENMP: ${ENABLE_OPENMP}") -if(ENABLE_OPENMP) +if(${ENABLE_OPENMP}) find_package(OpenMP REQUIRED COMPONENTS Fortran) endif() @@ -185,7 +185,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE parms) #metis target_link_libraries(${PROJECT_NAME} PRIVATE MPI::MPI_Fortran) set_target_properties(${PROJECT_NAME} PROPERTIES LINKER_LANGUAGE Fortran) -if(ENABLE_OPENMP) +if(${ENABLE_OPENMP}) target_link_libraries(${PROJECT_NAME} PRIVATE OpenMP::OpenMP_Fortran) endif() @@ -284,8 +284,8 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL NVHPC ) endif() if(${ENABLE_OPENMP}) target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast) - # else() - # target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast,inline) + else() + target_compile_options(${PROJECT_NAME} PRIVATE -Mipa=fast,inline) endif() endif() @@ -323,4 +323,4 @@ target_link_libraries(${PROJECT_NAME}.x PUBLIC ${PROJECT_NAME}) ### Export and installation -fesom_export(TARGETS ${PROJECT_NAME} parms fesom.x) \ No newline at end of file +fesom_export(TARGETS ${PROJECT_NAME} parms fesom.x) From d9944917b6b53244a38789078a75be70e09b7046 Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Thu, 6 Jun 2024 15:17:47 +0200 Subject: [PATCH 4/5] Added phase 1 of changes to init_tracer_AB on top of production_DE_openacc_test --- src/fesom_module.F90 | 6 +++ src/oce_ale_tracer.F90 | 12 ++++- src/oce_tracer_mod.F90 | 99 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 103 insertions(+), 14 deletions(-) diff --git a/src/fesom_module.F90 b/src/fesom_module.F90 index 843152e4e..4ca4ac633 100755 --- a/src/fesom_module.F90 +++ b/src/fesom_module.F90 @@ -382,6 +382,9 @@ subroutine fesom_init(fesom_total_nsteps) !$ACC CREATE (f%tracers%work%adv_flux_hor, f%tracers%work%adv_flux_ver, f%tracers%work%fct_LO) & !$ACC CREATE (f%tracers%work%del_ttf_advvert, f%tracers%work%del_ttf_advhoriz, f%tracers%work%edge_up_dn_grad) & !$ACC CREATE (f%tracers%work%del_ttf) + + !! Creating variables in GPU memory for init_tracers_AB module + !$ACC ENTER DATA CREATE(tr_xy, tr_z, relax2clim, Sclim, Tclim) end subroutine @@ -632,6 +635,9 @@ subroutine fesom_finalize() !$ACC EXIT DATA DELETE (f%dynamics%w, f%dynamics%w_e, f%dynamics%uv) !$ACC EXIT DATA DELETE (f%dynamics, f%tracers) + !!$ Deleting init_tracers_AB values + !$ACC EXIT DATA DELETE (tr_xy, tr_z, relax2clim, Sclim, Tclim) + !delete mesh and partit data. !$ACC EXIT DATA DELETE (f%mesh%coriolis_node, f%mesh%nn_num, f%mesh%nn_pos) !$ACC EXIT DATA DELETE (f%mesh%ssh_stiff, f%mesh%ssh_stiff%rowptr) diff --git a/src/oce_ale_tracer.F90 b/src/oce_ale_tracer.F90 index 1a0deaf5e..187339ede 100644 --- a/src/oce_ale_tracer.F90 +++ b/src/oce_ale_tracer.F90 @@ -148,6 +148,9 @@ subroutine solve_tracers_ale(ice, dynamics, tracers, partit, mesh) subroutine solve_tracers_ale(ice, dynamics, tracers, partit, mesh) use g_config use o_PARAM, only: SPP, Fer_GM + !tr_xy and tr_z are needed cause, we are writing them on the GPU in init_tracers_AB subroutine + !and updating them so HOST can have access to them + use o_arrays, only: tr_xy, tr_z use mod_mesh USE MOD_PARTIT USE MOD_PARSUP @@ -219,15 +222,20 @@ subroutine solve_tracers_ale(ice, dynamics, tracers, partit, mesh) ! do tracer AB (Adams-Bashfort) interpolation only for advectiv part ! needed if (flag_debug .and. mype==0) print *, achar(27)//'[37m'//' --> call init_tracers_AB'//achar(27)//'[0m' + !$ACC UPDATE DEVICE(tracers%data(tr_num)%values, tracers%data(tr_num)%valuesAB) call init_tracers_AB(tr_num, tracers, partit, mesh) + !$ACC UPDATE HOST(tr_xy, tr_z) + ! advect tracers if (flag_debug .and. mype==0) print *, achar(27)//'[37m'//' --> call adv_tracers_ale'//achar(27)//'[0m' !here update only those initialized in the init_tracers. (values, valuesAB, edge_up_dn_grad, ...) - !$ACC UPDATE DEVICE(tracers%data(tr_num)%values, tracers%data(tr_num)%valuesAB) & - !$ACC DEVICE(tracers%work%edge_up_dn_grad) !!& + !!!! UPDATE from hpc_tracer !!!! + !we dont have to update because we are updating before init_tracers_AB + !!$ACC UPDATE DEVICE(tracers%data(tr_num)%values, tracers%data(tr_num)%valuesAB) & + !$ACC UPDATE DEVICE(tracers%work%edge_up_dn_grad) !!& ! it will update del_ttf with contributions from horizontal and vertical advection parts (del_ttf_advhoriz and del_ttf_advvert) !$ACC wait(1) call do_oce_adv_tra(dt, UV, Wvel, Wvel_i, Wvel_e, tr_num, dynamics, tracers, partit, mesh) diff --git a/src/oce_tracer_mod.F90 b/src/oce_tracer_mod.F90 index 944be884b..46ee90440 100755 --- a/src/oce_tracer_mod.F90 +++ b/src/oce_tracer_mod.F90 @@ -25,7 +25,10 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) type(t_tracer), intent(inout), target :: tracers integer :: n,nz +#ifndef ENABLE_OPENACC +#else !$ACC parallel loop collapse(2) default(present) !!!async(1) +#endif do n=1, partit%myDim_nod2D+partit%eDim_nod2D do nz=1, mesh%nl-1 ! del_ttf will contain all advection / diffusion contributions for this tracer. Set it to 0 at the beginning! @@ -34,44 +37,80 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) tracers%work%del_ttf_advvert (nz, n) = 0.0_WP end do end do +#ifndef ENABLE_OPENACC +#else !$ACC end parallel loop +#endif +#ifndef ENABLE_OPENACC !$OMP PARALLEL DO +#else +!$ACC parallel loop collapse(2) +#endif do n=1, partit%myDim_nod2D+partit%eDim_nod2D ! AB interpolation - if (tracers%data(tr_num)%AB_order==2) then - tracers%data(tr_num)%valuesAB(:, n) =-(0.5_WP+epsilon)*tracers%data(tr_num)%valuesold(1, :, n)+(1.5_WP+epsilon)*tracers%data(tr_num)%values(:, n) - elseif (tracers%data(tr_num)%AB_order==3) then - tracers%data(tr_num)%valuesAB(:, n) =5.0_WP*tracers%data(tr_num)%valuesold(2, :, n)-16.0_WP*tracers%data(tr_num)%valuesold(1, :, n)+23.0_WP*tracers%data(tr_num)%values(:, n) - tracers%data(tr_num)%valuesAB(:, n) =tracers%data(tr_num)%valuesAB(:, n)/12.0_WP - end if + do nz = 1, mesh%nl-1 + if (tracers%data(tr_num)%AB_order==2) then + tracers%data(tr_num)%valuesAB(nz, n) =-(0.5_WP+epsilon)*tracers%data(tr_num)%valuesold(1, nz, n)+(1.5_WP+epsilon)*tracers%data(tr_num)%values(nz, n) + elseif (tracers%data(tr_num)%AB_order==3) then + tracers%data(tr_num)%valuesAB(nz, n) =5.0_WP*tracers%data(tr_num)%valuesold(2, nz, n)-16.0_WP*tracers%data(tr_num)%valuesold(1, nz, n)+23.0_WP*tracers%data(tr_num)%values(nz, n) + tracers%data(tr_num)%valuesAB(nz, n) =tracers%data(tr_num)%valuesAB(nz, n)/12.0_WP + end if + end do end do +#ifndef ENABLE_OPENACC !$OMP END PARALLEL DO +#else +!$ACC end parallel loop +#endif if (tracers%data(tr_num)%AB_order==2) then + +#ifndef ENABLE_OPENACC !$OMP PARALLEL DO +#else +!$ACC parallel loop collapse(2) +#endif do n=1, partit%myDim_nod2d+partit%eDim_nod2D - tracers%data(tr_num)%valuesold(1, :, n)=tracers%data(tr_num)%values(:, n) + do nz = 1, mesh%nl-1 + tracers%data(tr_num)%valuesold(1, nz, n)=tracers%data(tr_num)%values(nz, n) + end do end do +#ifndef ENABLE_OPENACC !$OMP END PARALLEL DO +#else +!$ACC end parallel loop +#endif + elseif (tracers%data(tr_num)%AB_order==3) then + +#ifndef ENABLE_OPENACC !$OMP PARALLEL DO +#else +!$ACC parallel loop collapse(2) +#endif do n=1, partit%myDim_nod2d+partit%eDim_nod2D - tracers%data(tr_num)%valuesold(2, :, n)=tracers%data(tr_num)%valuesold(1, :, n) - tracers%data(tr_num)%valuesold(1, :, n)=tracers%data(tr_num)%values(:, n) + do nz = 1, mesh%nl-1 + tracers%data(tr_num)%valuesold(2, nz, n)=tracers%data(tr_num)%valuesold(1, nz, n) + tracers%data(tr_num)%valuesold(1, nz, n)=tracers%data(tr_num)%values(nz, n) + end do end do +#ifndef ENABLE_OPENACC !$OMP END PARALLEL DO +#else +!$ACC end parallel loop +#endif end if if (flag_debug .and. partit%mype==0) print *, achar(27)//'[38m'//' --> call tracer_gradient_elements'//achar(27)//'[0m' call tracer_gradient_elements(tracers%data(tr_num)%valuesAB, partit, mesh) - call exchange_elem_begin(tr_xy, partit) + call exchange_elem_begin(tr_xy, partit, luse_g2g = .true.) if (flag_debug .and. partit%mype==0) print *, achar(27)//'[38m'//' --> call tracer_gradient_z'//achar(27)//'[0m' call tracer_gradient_z(tracers%data(tr_num)%values, partit, mesh) !WHY NOT AB HERE? DSIDOREN! call exchange_elem_end(partit) ! tr_xy used in fill_up_dn_grad !$OMP BARRIER - call exchange_nod_begin(tr_z, partit) ! not used in fill_up_dn_grad + call exchange_nod_begin(tr_z, partit, luse_g2g = .true.) ! not used in fill_up_dn_grad if (flag_debug .and. partit%mype==0) print *, achar(27)//'[38m'//' --> call fill_up_dn_grad'//achar(27)//'[0m' call fill_up_dn_grad(tracers%work, partit, mesh) @@ -79,7 +118,7 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) if (flag_debug .and. partit%mype==0) print *, achar(27)//'[38m'//' --> call tracer_gradient_elements'//achar(27)//'[0m' call tracer_gradient_elements(tracers%data(tr_num)%values, partit, mesh) !redefine tr_arr to the current timestep - call exchange_elem(tr_xy, partit) + call exchange_elem(tr_xy, partit, luse_g2g = .true.) END SUBROUTINE init_tracers_AB ! @@ -105,7 +144,12 @@ SUBROUTINE tracer_gradient_elements(ttf, partit, mesh) #include "associate_mesh_def.h" #include "associate_part_ass.h" #include "associate_mesh_ass.h" +#ifndef ENABLE_OPENACC !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(elem, elnodes, nz, nzmin, nzmax) +#else +!$ACC UPDATE DEVICE(gradient_sca) +!$ACC parallel loop private(elnodes) +#endif DO elem=1, myDim_elem2D elnodes=elem2D_nodes(:,elem) nzmin = ulevels(elem) @@ -116,7 +160,11 @@ SUBROUTINE tracer_gradient_elements(ttf, partit, mesh) tr_xy(2,nz, elem)=sum(gradient_sca(4:6,elem)*ttf(nz,elnodes)) END DO END DO +#ifndef ENABLE_OPENACC !$OMP END PARALLEL DO +#else +!$ACC end parallel loop +#endif END SUBROUTINE tracer_gradient_elements ! ! @@ -141,7 +189,12 @@ SUBROUTINE tracer_gradient_z(ttf, partit, mesh) #include "associate_mesh_def.h" #include "associate_part_ass.h" #include "associate_mesh_ass.h" +#ifndef ENABLE_OPENACC !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, nz, nzmin, nzmax, dz) +#else +!$ACC UDPATE DEVICE(hnode_new) +!$ACC parallel loop +#endif DO n=1, myDim_nod2D+eDim_nod2D !!PS nlev=nlevels_nod2D(n) nzmax=nlevels_nod2D(n) @@ -156,7 +209,11 @@ SUBROUTINE tracer_gradient_z(ttf, partit, mesh) tr_z(nzmin, n)=0.0_WP tr_z(nzmax, n)=0.0_WP END DO +#ifndef ENABLE_OPENACC !$OMP END PARALLEL DO +#else +!$ACC end parallel loop +#endif END SUBROUTINE tracer_gradient_z ! ! @@ -184,7 +241,12 @@ SUBROUTINE relax_to_clim(tr_num, tracers, partit, mesh) trarr=>tracers%data(tr_num)%values(:,:) if ((clim_relax>1.0e-8_WP).and.(tracers%data(tr_num)%ID==1)) then +#ifndef ENABLE_OPENACC !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, nzmin, nzmax) +#else +!$ACC UPDATE DEVICE(relax2clim, Tclim) +!$ACC parallel loop +#endif DO n=1, myDim_nod2D nzmin = ulevels_nod2D(n) nzmax = nlevels_nod2D(n) @@ -193,17 +255,30 @@ SUBROUTINE relax_to_clim(tr_num, tracers, partit, mesh) trarr(nzmin:nzmax-1,n)=trarr(nzmin:nzmax-1,n)+& relax2clim(n)*dt*(Tclim(nzmin:nzmax-1,n)-trarr(nzmin:nzmax-1,n)) END DO +#ifndef ENABLE_OPENACC !$OMP END PARALLEL DO +#else +!$ACC end parallel loop +#endif END if if ((clim_relax>1.0e-8_WP).and.(tracers%data(tr_num)%ID==2)) then +#ifndef ENABLE_OPENACC !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, nzmin, nzmax) +#else +!$ACC UPDATE DEVICE(relax2clim, Sclim) +!$ACC parallel loop +#endif DO n=1, myDim_nod2D nzmin = ulevels_nod2D(n) nzmax = nlevels_nod2D(n) trarr(nzmin:nzmax-1,n)=trarr(nzmin:nzmax-1,n)+& relax2clim(n)*dt*(Sclim(nzmin:nzmax-1,n)-trarr(nzmin:nzmax-1,n)) END DO +#ifndef ENABLE_OPENACC !$OMP END PARALLEL DO +#else +!$ACC end parallel loop +#endif END IF END SUBROUTINE relax_to_clim END MODULE o_tracers From d1f41bd2f3fd5796b9a43e41cfdc07a0e6cd133d Mon Sep 17 00:00:00 2001 From: seshadri levante Date: Fri, 7 Jun 2024 12:49:15 +0200 Subject: [PATCH 5/5] Updated with Miguels's comment --- src/oce_tracer_mod.F90 | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/oce_tracer_mod.F90 b/src/oce_tracer_mod.F90 index 46ee90440..7bedf0b27 100755 --- a/src/oce_tracer_mod.F90 +++ b/src/oce_tracer_mod.F90 @@ -41,20 +41,17 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) #else !$ACC end parallel loop #endif + + ! AB interpolation + if (tracers%data(tr_num)%AB_order==2) then #ifndef ENABLE_OPENACC !$OMP PARALLEL DO #else !$ACC parallel loop collapse(2) #endif do n=1, partit%myDim_nod2D+partit%eDim_nod2D - ! AB interpolation do nz = 1, mesh%nl-1 - if (tracers%data(tr_num)%AB_order==2) then tracers%data(tr_num)%valuesAB(nz, n) =-(0.5_WP+epsilon)*tracers%data(tr_num)%valuesold(1, nz, n)+(1.5_WP+epsilon)*tracers%data(tr_num)%values(nz, n) - elseif (tracers%data(tr_num)%AB_order==3) then - tracers%data(tr_num)%valuesAB(nz, n) =5.0_WP*tracers%data(tr_num)%valuesold(2, nz, n)-16.0_WP*tracers%data(tr_num)%valuesold(1, nz, n)+23.0_WP*tracers%data(tr_num)%values(nz, n) - tracers%data(tr_num)%valuesAB(nz, n) =tracers%data(tr_num)%valuesAB(nz, n)/12.0_WP - end if end do end do #ifndef ENABLE_OPENACC @@ -63,8 +60,27 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) !$ACC end parallel loop #endif - if (tracers%data(tr_num)%AB_order==2) then + ! AB interpolation contd + elseif (tracers%data(tr_num)%AB_order==3) then +#ifndef ENABLE_OPENACC +!$OMP PARALLEL DO +#else +!$ACC parallel loop collapse(2) +#endif + do n=1, partit%myDim_nod2D+partit%eDim_nod2D + do nz = 1, mesh%nl-1 + tracers%data(tr_num)%valuesAB(nz, n) =5.0_WP*tracers%data(tr_num)%valuesold(2, nz, n)-16.0_WP*tracers%data(tr_num)%valuesold(1, nz, n)+23.0_WP*tracers%data(tr_num)%values(nz, n) + tracers%data(tr_num)%valuesAB(nz, n) =tracers%data(tr_num)%valuesAB(nz, n)/12.0_WP + end do + end do +end if +#ifndef ENABLE_OPENACC +!$OMP END PARALLEL DO +#else +!$ACC end parallel loop +#endif + if (tracers%data(tr_num)%AB_order==2) then #ifndef ENABLE_OPENACC !$OMP PARALLEL DO #else @@ -82,7 +98,6 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) #endif elseif (tracers%data(tr_num)%AB_order==3) then - #ifndef ENABLE_OPENACC !$OMP PARALLEL DO #else