Skip to content

Commit

Permalink
CUDA: Owned fields now allocated in pinned host memory
Browse files Browse the repository at this point in the history
  • Loading branch information
awnawab committed Sep 6, 2023
1 parent 389523a commit 10323f1
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Features of FIELD_API can be toggled by passing the following argument to the CM
| TESTS | ON | Build the testing suite. |
| BUDDY_MALLOC | ON | Enable the use of a binary buddy memory allocator for the shadow host allocation for `FIELD%DEVPTR`. This option is switched off if CUDA is enabled.|
| ACC | ON | Enable the use of OpenACC for GPU offload. |
| CUDA | OFF | Enable the use of CUDA for GPU offload. Disables the use of the buddy memory allocator and removes the shadow host allocation for `FIELD%DEVPTR`.|
| CUDA | OFF | Enable the use of CUDA for GPU offload. Disables the use of the buddy memory allocator, removes the shadow host allocation for `FIELD%DEVPTR` and allocates owned fields (see below) in pinned (page-locked) host memory.|

## Supported compilers
The library has been tested with the nvhpc toolkit from Nvidia, version 23.3
Expand Down
33 changes: 33 additions & 0 deletions field_RANKSUFF_module.fypp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ PUBLIC :: ${ftn}$_WRAPPER

TYPE, EXTENDS(${ftn}$) :: ${ftn}$_OWNER
INTEGER(KIND=JPIM) :: LBOUNDS(${ft.rank}$), UBOUNDS(${ft.rank}$)
#:if defined('CUDA')
${ft.type}$, PINNED, ALLOCATABLE, PRIVATE :: DATA(${ft.shape}$)
#:endif
CONTAINS
PROCEDURE :: INIT => ${ftn}$_OWNER_INIT
PROCEDURE :: FINAL => ${ftn}$_OWNER_FINAL
Expand Down Expand Up @@ -175,15 +178,36 @@ CONTAINS

SELF%ISTATUS = UNALLOCATED
IF(.NOT. IS_DELAYED) THEN
#:if defined('CUDA')
CALL SELF%ALLOCATE(SELF%DATA)
#:else
CALL SELF%ALLOCATE()
#:endif
ENDIF
END SUBROUTINE ${ftn}$_OWNER_INIT

#:if defined('CUDA')
SUBROUTINE ${ftn}$_ALLOCATE (SELF, DATA)
#:else
SUBROUTINE ${ftn}$_ALLOCATE (SELF)
#:endif
! Create FIELD object by explicitly allocating new data
CLASS(${ftn}$_OWNER) :: SELF
#:if defined('CUDA')
${ft.type}$, #{if defined('CUDA')}#PINNED, #{endif}#ALLOCATABLE, TARGET :: DATA(${ft.shape}$)
INTEGER :: ISTAT
LOGICAL :: PLOG

ALLOCATE(DATA(${', '.join(map(lambda r: 'SELF%LBOUNDS('+str(r+1)+'):SELF%UBOUNDS('+str(r+1)+')', range(0, ft.rank)))}$), STAT=ISTAT, PINNED=PLOG)
SELF%PTR(${', '.join(map(lambda r: 'SELF%LBOUNDS('+str(r+1)+'):', range(0, ft.rank)))}$) => DATA

IF(.NOT. PLOG)THEN
PRINT *, "Failed to allocate page-locked memory for ${ftn}$_OWNER"
ENDIF
#:else

ALLOCATE(SELF%PTR(${', '.join(map(lambda r: 'SELF%LBOUNDS('+str(r+1)+'):SELF%UBOUNDS('+str(r+1)+')', range(0, ft.rank)))}$))
#:endif
SELF%ISTATUS = NHSTFRESH
END SUBROUTINE ${ftn}$_ALLOCATE

Expand Down Expand Up @@ -234,7 +258,12 @@ CONTAINS
! Finalizes field and deallocates owned data
CLASS(${ftn}$_OWNER) :: SELF
IF (ASSOCIATED(SELF%PTR)) THEN
#:if defined('CUDA')
DEALLOCATE(SELF%DATA)
NULLIFY(SELF%PTR)
#:else
DEALLOCATE(SELF%PTR)
#:endif
END IF
CALL SELF%${ftn}$_FINAL
END SUBROUTINE ${ftn}$_OWNER_FINAL
Expand Down Expand Up @@ -404,7 +433,11 @@ CONTAINS
INTEGER (KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE

IF(SELF%ISTATUS==UNALLOCATED)THEN
#:if defined('CUDA')
CALL SELF%ALLOCATE(SELF%DATA)
#:else
CALL SELF%ALLOCATE()
#:endif
ENDIF
CALL SELF%${ftn}$_GET_HOST_DATA(MODE, PTR, QUEUE)

Expand Down
11 changes: 9 additions & 2 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ecbuild_add_test(
OpenMP::OpenMP_Fortran
$<${HAVE_ACC}:OpenACC::OpenACC_Fortran>
)
target_link_options( main.x PRIVATE $<${HAVE_CUDA}:-cuda> )
target_link_options( main.x PRIVATE $<${HAVE_CUDA}:-cuda;-gpu=pinned> )
target_compile_definitions( main.x PRIVATE $<${HAVE_CUDA}:_CUDA> )

## Unit tests
Expand Down Expand Up @@ -62,7 +62,7 @@ foreach(TEST_FILE ${TEST_FILES})
PROPERTIES Fortran_MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/include/tests
)

target_link_options( ${TEST_NAME}.x PRIVATE $<${HAVE_CUDA}:-cuda> )
target_link_options( ${TEST_NAME}.x PRIVATE $<${HAVE_CUDA}:-cuda;-gpu=pinned> )
target_compile_definitions( ${TEST_NAME}.x PRIVATE $<${HAVE_CUDA}:_CUDA> )
endforeach()

Expand All @@ -76,4 +76,11 @@ ecbuild_add_test(
parkind_sp
OpenMP::OpenMP_Fortran
)
# don't need gpu=pinned link flag because this test only has wrapper fields
target_link_options( init_wrapper_mixed_precision.x PRIVATE $<${HAVE_CUDA}:-cuda> )

## host pinning test
if( HAVE_CUDA )
add_test(NAME cuda_host_pinning_test COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_SOURCE_DIR}/test_host_pinning.cmake"
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/tests )
endif()
4 changes: 4 additions & 0 deletions tests/init_owner.F90
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ PROGRAM INIT_OWNER
CLASS(FIELD_2RB), POINTER :: O => NULL()
REAL(KIND=JPRB), POINTER :: PTR(:,:)

#ifdef _CUDA
PRINT *, "Initialize stdout for CUDA host pinning test"
#endif

CALL FIELD_NEW(O, LBOUNDS=[10,1], UBOUNDS=[21,11])
CALL O%GET_HOST_DATA_RDWR(PTR)
PTR=42
Expand Down
22 changes: 22 additions & 0 deletions tests/test_host_pinning.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# (C) Copyright 2022- ECMWF.
# (C) Copyright 2022- Meteo-France.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

execute_process(COMMAND "./init_owner.x"
RESULT_VARIABLE EXIT_CODE
OUTPUT_VARIABLE STDOUT
COMMAND_ERROR_IS_FATAL ANY)

if( NOT EXIT_CODE EQUAL 0 )
message(FATAL_ERROR "init_owner unit-test failed")
endif()

string(FIND ${STDOUT} "Failed to allocate page-locked memory" RESULT)
if( NOT RESULT EQUAL -1 )
message(FATAL_ERROR "Failed to allocate page-locked memory")
endif()

0 comments on commit 10323f1

Please sign in to comment.