From 2e132b719ceff07c601857c087cff96e74cf914a Mon Sep 17 00:00:00 2001 From: suvarchal Date: Fri, 24 Mar 2023 02:24:19 +0100 Subject: [PATCH 1/7] add a template to add different compilers for same machine through changes for levante: different shell envs for levante for different compilers can now be used as configure.sh levante.nvhpc instead of replacing env/shell --- env.sh | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/env.sh b/env.sh index 950052e68..bb07fd886 100755 --- a/env.sh +++ b/env.sh @@ -27,8 +27,13 @@ fi if [[ $LOGINHOST =~ ^m[A-Za-z0-9]+\.hpc\.dkrz\.de$ ]]; then STRATEGY="mistral.dkrz.de" -elif [[ $LOGINHOST =~ ^l[A-Za-z0-9]+\.lvt\.dkrz\.de$ ]]; then +elif [[ $LOGINHOST =~ ^levante ]] || [[ $LOGINHOST =~ ^l[:alnum:]+\.lvt\.dkrz\.de$ ]]; then STRATEGY="levante.dkrz.de" + # following regex only matches if input is 2 word like levante.nvhpc + compid_regex="^([[:alnum:]]+)\.([[:alnum:]]+)$" + if [[ $LOGINHOST =~ $compid_regex ]]; then + COMPILERID="${BASH_REMATCH[2]}" + fi elif [[ $LOGINHOST =~ ^ollie[0-9]$ ]] || [[ $LOGINHOST =~ ^prod-[0-9]{4}$ ]]; then STRATEGY="ollie" elif [[ $LOGINHOST =~ ^albedo[0-9]$ ]] || [[ $LOGINHOST =~ ^prod-[0-9]{4}$ ]]; then @@ -74,10 +79,18 @@ fi DIR="$( cd "$( dirname "${SOURCE}" )" && pwd )" if [ $BEING_EXECUTED = true ]; then - # file is being executed + # file is being executed, why is this here? echo $DIR/env/$STRATEGY else # file is being sourced export FESOM_PLATFORM_STRATEGY=$STRATEGY - source $DIR/env/$STRATEGY/shell + SHELLFILE="${DIR}/env/${STRATEGY}/shell" + if [[ -n ${COMPILERID} ]]; then + SHELLFILE="${SHELLFILE}.${COMPILERID}" + fi + if [[ ! -e ${SHELLFILE} ]]; then + echo "Shell file for ${LOGINHOST} doesnt exist: "$SHELLFILE + exit 1 + fi + source $SHELLFILE fi From 1d4e0166506a0b77767e5daa00526bac9b9baf83 Mon Sep 17 00:00:00 2001 From: suvarchal Date: Fri, 24 Mar 2023 02:25:47 +0100 Subject: [PATCH 2/7] make shell.intel as default on levante it can also be used as ./configure levante.intel --- env/levante.dkrz.de/shell | 38 +-------------------------------- env/levante.dkrz.de/shell.intel | 37 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 37 deletions(-) mode change 100755 => 120000 env/levante.dkrz.de/shell create mode 100755 env/levante.dkrz.de/shell.intel diff --git a/env/levante.dkrz.de/shell b/env/levante.dkrz.de/shell deleted file mode 100755 index d9327301d..000000000 --- a/env/levante.dkrz.de/shell +++ /dev/null @@ -1,37 +0,0 @@ -# make the contents as shell agnostic as possible so we can include them with bash, zsh and others -export LC_ALL=en_US.UTF-8 -export CPU_MODEL=AMD_EPYC_ZEN3 - -module load intel-oneapi-compilers/2022.0.1-gcc-11.2.0 -module load intel-oneapi-mkl/2022.0.1-gcc-11.2.0 -module load openmpi/4.1.2-intel-2021.5.0 -export FC=mpif90 CC=mpicc CXX=mpicxx ; -export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH - -export lib_multio=/home/a/a270029/fesom2.0/lib/multio/lib/lib64/ -export inc_multio=/home/a/a270029/fesom2.0/lib/multio/build/module/ -export lib_eccodes=/sw/spack-levante/eccodes-2.21.0-3ehkbb/lib64/ - -module load netcdf-c/4.8.1-openmpi-4.1.2-intel-2021.5.0 -module load netcdf-fortran/4.5.3-openmpi-4.1.2-intel-2021.5.0 -module load git # to be able to determine the fesom git SHA when compiling - -ulimit -s unlimited # without setting the stack size we get a segfault from the levante netcdf library at runtime -ulimit -c 0 # do not create a coredump after a crash - -# environment for Open MPI 4.0.0 and later from https://docs.dkrz.de/doc/levante/running-jobs/runtime-settings.html -export OMPI_MCA_pml="ucx" -export OMPI_MCA_btl=self -export OMPI_MCA_osc="pt2pt" -export UCX_IB_ADDR_TYPE=ib_global -# for most runs one may or may not want to disable HCOLL -export OMPI_MCA_coll="^ml,hcoll" -export OMPI_MCA_coll_hcoll_enable="0" -export HCOLL_ENABLE_MCAST_ALL="0" -export HCOLL_MAIN_IB=mlx5_0:1 -export UCX_NET_DEVICES=mlx5_0:1 -export UCX_TLS=mm,knem,cma,dc_mlx5,dc_x,self -export UCX_UNIFIED_MODE=y -export HDF5_USE_FILE_LOCKING=FALSE -export OMPI_MCA_io="romio321" -export UCX_HANDLE_ERRORS=bt diff --git a/env/levante.dkrz.de/shell b/env/levante.dkrz.de/shell new file mode 120000 index 000000000..e863d83d3 --- /dev/null +++ b/env/levante.dkrz.de/shell @@ -0,0 +1 @@ +shell.intel \ No newline at end of file diff --git a/env/levante.dkrz.de/shell.intel b/env/levante.dkrz.de/shell.intel new file mode 100755 index 000000000..d9327301d --- /dev/null +++ b/env/levante.dkrz.de/shell.intel @@ -0,0 +1,37 @@ +# make the contents as shell agnostic as possible so we can include them with bash, zsh and others +export LC_ALL=en_US.UTF-8 +export CPU_MODEL=AMD_EPYC_ZEN3 + +module load intel-oneapi-compilers/2022.0.1-gcc-11.2.0 +module load intel-oneapi-mkl/2022.0.1-gcc-11.2.0 +module load openmpi/4.1.2-intel-2021.5.0 +export FC=mpif90 CC=mpicc CXX=mpicxx ; +export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH + +export lib_multio=/home/a/a270029/fesom2.0/lib/multio/lib/lib64/ +export inc_multio=/home/a/a270029/fesom2.0/lib/multio/build/module/ +export lib_eccodes=/sw/spack-levante/eccodes-2.21.0-3ehkbb/lib64/ + +module load netcdf-c/4.8.1-openmpi-4.1.2-intel-2021.5.0 +module load netcdf-fortran/4.5.3-openmpi-4.1.2-intel-2021.5.0 +module load git # to be able to determine the fesom git SHA when compiling + +ulimit -s unlimited # without setting the stack size we get a segfault from the levante netcdf library at runtime +ulimit -c 0 # do not create a coredump after a crash + +# environment for Open MPI 4.0.0 and later from https://docs.dkrz.de/doc/levante/running-jobs/runtime-settings.html +export OMPI_MCA_pml="ucx" +export OMPI_MCA_btl=self +export OMPI_MCA_osc="pt2pt" +export UCX_IB_ADDR_TYPE=ib_global +# for most runs one may or may not want to disable HCOLL +export OMPI_MCA_coll="^ml,hcoll" +export OMPI_MCA_coll_hcoll_enable="0" +export HCOLL_ENABLE_MCAST_ALL="0" +export HCOLL_MAIN_IB=mlx5_0:1 +export UCX_NET_DEVICES=mlx5_0:1 +export UCX_TLS=mm,knem,cma,dc_mlx5,dc_x,self +export UCX_UNIFIED_MODE=y +export HDF5_USE_FILE_LOCKING=FALSE +export OMPI_MCA_io="romio321" +export UCX_HANDLE_ERRORS=bt From 41b00a6b4c102020bd0583086ff6d4f5f119a03a Mon Sep 17 00:00:00 2001 From: suvarchal Date: Fri, 24 Mar 2023 02:31:55 +0100 Subject: [PATCH 3/7] remove multio options --- env/levante.dkrz.de/shell.intel | 4 ---- 1 file changed, 4 deletions(-) diff --git a/env/levante.dkrz.de/shell.intel b/env/levante.dkrz.de/shell.intel index d9327301d..c303251dc 100755 --- a/env/levante.dkrz.de/shell.intel +++ b/env/levante.dkrz.de/shell.intel @@ -8,10 +8,6 @@ module load openmpi/4.1.2-intel-2021.5.0 export FC=mpif90 CC=mpicc CXX=mpicxx ; export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH -export lib_multio=/home/a/a270029/fesom2.0/lib/multio/lib/lib64/ -export inc_multio=/home/a/a270029/fesom2.0/lib/multio/build/module/ -export lib_eccodes=/sw/spack-levante/eccodes-2.21.0-3ehkbb/lib64/ - module load netcdf-c/4.8.1-openmpi-4.1.2-intel-2021.5.0 module load netcdf-fortran/4.5.3-openmpi-4.1.2-intel-2021.5.0 module load git # to be able to determine the fesom git SHA when compiling From 169c34033536bdfbd2d4e32810e9e8782efe37e0 Mon Sep 17 00:00:00 2001 From: suvarchal Date: Fri, 24 Mar 2023 02:35:00 +0100 Subject: [PATCH 4/7] add intel mkl elegantly then using hard coded path in LD_LIBRARY_PATH, wonder why module can't do it --- env/levante.dkrz.de/shell.intel | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/env/levante.dkrz.de/shell.intel b/env/levante.dkrz.de/shell.intel index c303251dc..26b983d9e 100755 --- a/env/levante.dkrz.de/shell.intel +++ b/env/levante.dkrz.de/shell.intel @@ -6,7 +6,8 @@ module load intel-oneapi-compilers/2022.0.1-gcc-11.2.0 module load intel-oneapi-mkl/2022.0.1-gcc-11.2.0 module load openmpi/4.1.2-intel-2021.5.0 export FC=mpif90 CC=mpicc CXX=mpicxx ; -export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH +spack load intel-oneapi-mkl@2022.0.1%gcc@11.2.0 # this handles adding to path elegantly then using hardcoded path below +#export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH module load netcdf-c/4.8.1-openmpi-4.1.2-intel-2021.5.0 module load netcdf-fortran/4.5.3-openmpi-4.1.2-intel-2021.5.0 From f724749ea586dfe743940bc5c45245d1f811bdc1 Mon Sep 17 00:00:00 2001 From: suvarchal Date: Fri, 24 Mar 2023 02:35:47 +0100 Subject: [PATCH 5/7] remove redundant module load as spack loads mkl/blas --- env/levante.dkrz.de/shell.intel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env/levante.dkrz.de/shell.intel b/env/levante.dkrz.de/shell.intel index 26b983d9e..ea9936da7 100755 --- a/env/levante.dkrz.de/shell.intel +++ b/env/levante.dkrz.de/shell.intel @@ -3,10 +3,10 @@ export LC_ALL=en_US.UTF-8 export CPU_MODEL=AMD_EPYC_ZEN3 module load intel-oneapi-compilers/2022.0.1-gcc-11.2.0 -module load intel-oneapi-mkl/2022.0.1-gcc-11.2.0 module load openmpi/4.1.2-intel-2021.5.0 export FC=mpif90 CC=mpicc CXX=mpicxx ; spack load intel-oneapi-mkl@2022.0.1%gcc@11.2.0 # this handles adding to path elegantly then using hardcoded path below +#module load intel-oneapi-mkl/2022.0.1-gcc-11.2.0 #export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH module load netcdf-c/4.8.1-openmpi-4.1.2-intel-2021.5.0 From da62a3ea0d57d01f1379f170217f5161718742d8 Mon Sep 17 00:00:00 2001 From: suvarchal Date: Fri, 24 Mar 2023 02:57:28 +0100 Subject: [PATCH 6/7] add compilation using gnu compiler on levante --- env/levante.dkrz.de/shell.gnu | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100755 env/levante.dkrz.de/shell.gnu diff --git a/env/levante.dkrz.de/shell.gnu b/env/levante.dkrz.de/shell.gnu new file mode 100755 index 000000000..15dabc6b2 --- /dev/null +++ b/env/levante.dkrz.de/shell.gnu @@ -0,0 +1,40 @@ +# make the contents as shell agnostic as possible so we can include them with bash, zsh and others +export LC_ALL=en_US.UTF-8 + +module load git +module load gcc/11.2.0-gcc-11.2.0 + +# both mpi below work +#module load intel-oneapi-mpi/2021.5.0-gcc-11.2.0 +module load openmpi/4.1.2-gcc-11.2.0 + +# both below work not sure whats the diff? +module load netcdf-c/4.8.1-intel-oneapi-mpi-2021.5.0-gcc-11.2.0 +module load netcdf-fortran/4.5.3-intel-oneapi-mpi-2021.5.0-gcc-11.2.0 + +#module load netcdf-c/4.8.1-gcc-11.2.0 +#module load netcdf-fortran/4.5.3-gcc-11.2.0 + + +export FC=mpif90 CC=mpicc CXX=mpicxx + +#module load intel-oneapi-mkl/2022.0.1-gcc-11.2.0 +# so use the LD_LIBRARY_PATH or other paths like prefix paths etc for cmake +#export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH +spack load intel-oneapi-mkl@2022.0.1%gcc@11.2.0 + +export OMPI_MCA_pml="ucx" +export OMPI_MCA_btl=self +export OMPI_MCA_osc="pt2pt" +export UCX_IB_ADDR_TYPE=ib_global +# for most runs one may or may not want to disable HCOLL +export OMPI_MCA_coll="^ml,hcoll" +export OMPI_MCA_coll_hcoll_enable="0" +export HCOLL_ENABLE_MCAST_ALL="0" +export HCOLL_MAIN_IB=mlx5_0:1 +export UCX_NET_DEVICES=mlx5_0:1 +export UCX_TLS=mm,knem,cma,dc_mlx5,dc_x,self +export UCX_UNIFIED_MODE=y +export HDF5_USE_FILE_LOCKING=FALSE +export OMPI_MCA_io="romio321" +export UCX_HANDLE_ERRORS=bt From 169263466c38683d3937f660e79299da1868eb0a Mon Sep 17 00:00:00 2001 From: suvarchal Date: Fri, 24 Mar 2023 23:05:43 +0100 Subject: [PATCH 7/7] add feature to configure to be able to set fesom's build time options (in CMakeLists) and cmake in commandline configure.sh --- configure.sh | 4 +++- env.sh | 11 ++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/configure.sh b/configure.sh index 0e957b8f5..86d7eb02a 100755 --- a/configure.sh +++ b/configure.sh @@ -5,5 +5,7 @@ set -e source env.sh # source this from your run script too mkdir build || true # make sure not to commit this to svn or git cd build -cmake .. -DCMAKE_BUILD_TYPE=Debug # not required when re-compiling +cmake .. $@ -DCMAKE_BUILD_TYPE=Debug # not required when re-compiling + # additional cmake arguments can be passed to configure.sh + # this also includes fesom specific options in CMakeLists, can be used as -DFESOM_COUPLED=ON make install -j`nproc --all` diff --git a/env.sh b/env.sh index bb07fd886..5e7dc0191 100755 --- a/env.sh +++ b/env.sh @@ -17,19 +17,20 @@ else BEING_EXECUTED=false fi -# if an arg is given, use it as hostname -if [ -z "$1" ]; then +# if an arg is given and doesn't start with - use it as hostname, arguments with - are passed on to cmake +if [[ ! -z "$1" ]] && [[ ! "$1" = ^- ]]; then + LOGINHOST=$1 # arg exists and doesn't start with - + shift # pop the argument as we already stored it +else # no argument given LOGINHOST="$(hostname -f)" -else - LOGINHOST=$1 fi if [[ $LOGINHOST =~ ^m[A-Za-z0-9]+\.hpc\.dkrz\.de$ ]]; then STRATEGY="mistral.dkrz.de" elif [[ $LOGINHOST =~ ^levante ]] || [[ $LOGINHOST =~ ^l[:alnum:]+\.lvt\.dkrz\.de$ ]]; then STRATEGY="levante.dkrz.de" - # following regex only matches if input is 2 word like levante.nvhpc + # following regex only matches if input is 2 word like levante.nvhpc, this enables using different shells for a machine directly compid_regex="^([[:alnum:]]+)\.([[:alnum:]]+)$" if [[ $LOGINHOST =~ $compid_regex ]]; then COMPILERID="${BASH_REMATCH[2]}"