Skip to content

Commit

Permalink
Added support for partial field offload to device
Browse files Browse the repository at this point in the history
  • Loading branch information
wertysas committed Dec 5, 2024
1 parent b5a9278 commit aef8344
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 31 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ endforeach ()
unset( non_ranksuff_srcs )
list ( APPEND non_ranksuff_srcs dev_alloc_module field_factory_module field_access_module field_array_module field_module
field_shuffle_module field_util_module field_array_util_module field_shuffle_type_module host_alloc_module
field_gathscat_module field_gathscat_type_module)
field_gathscat_module field_gathscat_type_module field_data_module)
if(HAVE_FIELD_GANG)
list( APPEND non_ranksuff_srcs field_gang_module )
endif()
Expand Down
8 changes: 4 additions & 4 deletions field_RANKSUFF_data_module.fypp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ PRIVATE
#:for ft in fieldTypeList
#:set ftn = ft.name

PUBLIC :: ${ftn}$_COPY
PUBLIC :: ${ftn}$_COPY_ARRAY
PUBLIC :: ${ftn}$_COPY_FUNC
PUBLIC :: ${ftn}$_COPY_INTF

Expand Down Expand Up @@ -92,7 +92,7 @@ CONTAINS

END FUNCTION

SUBROUTINE ${ftn}$_COPY (HST, DEV, MAP_DEVPTR, KDIR, QUEUE)
SUBROUTINE ${ftn}$_COPY_ARRAY (HST, DEV, MAP_DEVPTR, KDIR, QUEUE)

USE FIELD_ABORT_MODULE

Expand All @@ -101,9 +101,9 @@ CONTAINS
INTEGER (KIND=JPIM), INTENT (IN) :: KDIR
INTEGER (KIND=JPIM), OPTIONAL, INTENT (IN) :: QUEUE

PROCEDURE (${ftn}$_COPY_INTF), POINTER :: FUNC
PROCEDURE (${ftn}$_COPY_INTF), POINTER :: FUNC

FUNC => ${ftn}$_COPY_FUNC (HST, DEV)
FUNC => ${ftn}$_COPY_FUNC (HST, DEV)

CALL FUNC (HST, DEV, MAP_DEVPTR, KDIR, QUEUE)

Expand Down
3 changes: 2 additions & 1 deletion field_RANKSUFF_gang_module.fypp
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,12 @@ CONTAINS
END SUBROUTINE

#:for what in ['HOST', 'DEVICE']
SUBROUTINE ${ftn1}$_GET_${what}$_DATA_WRAPPER_HELPER (SELF, MODE, PTR, QUEUE)
SUBROUTINE ${ftn1}$_GET_${what}$_DATA_WRAPPER_HELPER (SELF, MODE, PTR, QUEUE, BLK_BOUNDS)
CLASS(${ftn1}$_WRAPPER_HELPER) :: SELF
INTEGER (KIND=JPIM), INTENT(IN) :: MODE
${ft1.type}$, POINTER, INTENT(INOUT) :: PTR(${ft1.shape}$)
INTEGER (KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: BLK_BOUNDS(2)
INTEGER(KIND=JPIM) :: LBOUNDS(${ft1.rank}$)

IF (ASSOCIATED (SELF%PARENT)) THEN
Expand Down
75 changes: 50 additions & 25 deletions field_RANKSUFF_module.fypp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ USE HOST_ALLOC_MODULE
USE FIELD_BASIC_MODULE
USE FIELD_CONSTANTS_MODULE
USE FIELD_DEFAULTS_MODULE
USE FIELD_DATA_MODULE
#:if defined('CUDA')
USE CUDAFOR
#:endif
Expand All @@ -31,6 +32,7 @@ ${fieldType.useParkind1 ()}$
#:set ftn = ft.name
USE FIELD_${RANK}$${SUFF}$_DATA_MODULE, ONLY : ${ftn}$_COPY_INTF
#:endfor
USE FIELD_DATA_MODULE, ONLY : COPY_ARRAY

IMPLICIT NONE

Expand Down Expand Up @@ -426,17 +428,32 @@ CONTAINS

END SUBROUTINE ${ftn}$_WIPE_OBJECT

SUBROUTINE ${ftn}$_COPY_DATA (SELF, KDIR, QUEUE)
SUBROUTINE ${ftn}$_COPY_DATA (SELF, KDIR, QUEUE, BLK_BOUNDS)

USE FIELD_ABORT_MODULE

CLASS(${ftn}$) :: SELF
INTEGER (KIND=JPIM), INTENT(IN) :: KDIR
INTEGER (KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: BLK_BOUNDS(2)

INTEGER(KIND=JPIM) :: LB, UB
${ft.type}$, POINTER :: TMP_PTR(${ft.shape}$) => NULL()
REAL :: START, FINISH

CALL CPU_TIME(START)
CALL SELF%COPY_FUNC (SELF%PTR, SELF%DEVPTR, SELF%MAP_DEVPTR, KDIR, QUEUE)
CALL CPU_TIME(FINISH)


IF ( .NOT. PRESENT(BLK_BOUNDS) ) THEN
CALL CPU_TIME(START)
CALL SELF%COPY_FUNC (SELF%PTR, SELF%DEVPTR, SELF%MAP_DEVPTR, KDIR, QUEUE)
CALL CPU_TIME(FINISH)
ELSE
LB = LBOUND(SELF%PTR, ${ft.rank}$)
UB = UBOUND(SELF%PTR, ${ft.rank}$)
IF ( BLK_BOUNDS(1) < LB .OR. BLK_BOUNDS(2) > UB ) THEN
CALL FIELD_ABORT("BLOCK DIMENSIONS ARE OUR OF RANGE")
END IF
TMP_PTR => SELF%PTR(${':,' * (ft.rank-1) + 'BLK_BOUNDS(1):BLK_BOUNDS(2)'}$)
CALL COPY_ARRAY(TMP_PTR, SELF%DEVPTR, SELF%MAP_DEVPTR, KDIR, QUEUE)
END IF
IF (KDIR == NH2D) THEN
CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH)
ELSE IF (KDIR == ND2H) THEN
Expand All @@ -445,12 +462,12 @@ CONTAINS

END SUBROUTINE ${ftn}$_COPY_DATA

SUBROUTINE ${ftn}$_GET_HOST_DATA (SELF, MODE, PTR, QUEUE)
SUBROUTINE ${ftn}$_GET_HOST_DATA (SELF, MODE, PTR, QUEUE, BLK_BOUNDS)
CLASS(${ftn}$) :: SELF
INTEGER (KIND=JPIM), INTENT(IN) :: MODE

${ft.type}$, POINTER, INTENT(INOUT) :: PTR(${ft.shape}$)
INTEGER (KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: BLK_BOUNDS(2)

INTEGER(KIND=JPIM) :: LBOUNDS(${ft.rank}$)

Expand All @@ -466,11 +483,12 @@ CONTAINS

END SUBROUTINE ${ftn}$_GET_HOST_DATA

SUBROUTINE ${ftn}$_OWNER_GET_HOST_DATA (SELF, MODE, PTR, QUEUE)
SUBROUTINE ${ftn}$_OWNER_GET_HOST_DATA (SELF, MODE, PTR, QUEUE, BLK_BOUNDS)
CLASS(${ftn}$_OWNER) :: SELF
INTEGER (KIND=JPIM), INTENT(IN) :: MODE
${ft.type}$, POINTER, INTENT(INOUT) :: PTR(${ft.shape}$)
INTEGER (KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: BLK_BOUNDS(2)

IF(SELF%GET_STATUS ()==UNALLOCATED)THEN
CALL SELF%CREATE_HOST_DATA ()
Expand All @@ -479,16 +497,17 @@ CONTAINS
CALL SELF%SET_STATUS (NHSTFRESH)
ENDIF
ENDIF
CALL SELF%${ftn}$_GET_HOST_DATA(MODE, PTR, QUEUE)
CALL SELF%${ftn}$_GET_HOST_DATA(MODE, PTR, QUEUE, BLK_BOUNDS)

END SUBROUTINE ${ftn}$_OWNER_GET_HOST_DATA

SUBROUTINE ${ftn}$_GET_HOST_DATA_RDONLY (SELF, PPTR, QUEUE)
SUBROUTINE ${ftn}$_GET_HOST_DATA_RDONLY (SELF, PPTR, QUEUE, BLK_BOUNDS)
CLASS(${ftn}$) :: SELF
${ft.type}$, POINTER, INTENT(INOUT) :: PPTR(${ft.shape}$)
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: BLK_BOUNDS(2)

CALL SELF%GET_HOST_DATA (NRD, PPTR, QUEUE)
CALL SELF%GET_HOST_DATA (NRD, PPTR, QUEUE, BLK_BOUNDS)

END SUBROUTINE ${ftn}$_GET_HOST_DATA_RDONLY

Expand All @@ -501,12 +520,13 @@ CONTAINS

END SUBROUTINE ${ftn}$_SYNC_HOST_RDONLY

SUBROUTINE ${ftn}$_GET_HOST_DATA_RDWR (SELF, PPTR, QUEUE)
SUBROUTINE ${ftn}$_GET_HOST_DATA_RDWR (SELF, PPTR, QUEUE, BLK_BOUNDS)
CLASS(${ftn}$) :: SELF
${ft.type}$, POINTER, INTENT(INOUT) :: PPTR(${ft.shape}$)
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: BLK_BOUNDS(2)

CALL SELF%GET_HOST_DATA (IOR (NRD, NWR), PPTR, QUEUE)
CALL SELF%GET_HOST_DATA (IOR (NRD, NWR), PPTR, QUEUE, BLK_BOUNDS)

END SUBROUTINE ${ftn}$_GET_HOST_DATA_RDWR

Expand All @@ -525,19 +545,21 @@ CONTAINS
CALL DEV_ALLOCATE_HST (DEV=SELF%DEVPTR, HST=SELF%PTR, MAP_DEVPTR=SELF%MAP_DEVPTR)
END SUBROUTINE

SUBROUTINE ${ftn}$_GET_DEVICE_DATA (SELF, MODE, PTR, QUEUE)
SUBROUTINE ${ftn}$_GET_DEVICE_DATA (SELF, MODE, PTR, QUEUE, BLK_BOUNDS)
CLASS(${ftn}$) :: SELF
INTEGER (KIND=JPIM), INTENT(IN) :: MODE
${ft.type}$, POINTER, INTENT(INOUT) :: PTR(${ft.shape}$)
INTEGER (KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: BLK_BOUNDS(2)

INTEGER(KIND=JPIM) :: LBOUNDS(${ft.rank}$)

LBOUNDS=LBOUND(SELF%PTR)
IF (.NOT. ASSOCIATED (SELF%DEVPTR)) THEN
CALL SELF%CREATE_DEVICE_DATA
ENDIF
IF (IAND (SELF%GET_STATUS (), NDEVFRESH) == 0) THEN
CALL SELF%COPY_DATA (NH2D, QUEUE)
CALL SELF%COPY_DATA (NH2D, QUEUE, BLK_BOUNDS=BLK_BOUNDS)
CALL SELF%SET_STATUS (IOR (SELF%GET_STATUS (), NDEVFRESH))
ENDIF
PTR (${ft.lbptr}$) => SELF%DEVPTR (${','.join(':' for _ in range(ft.rank))}$)
Expand Down Expand Up @@ -573,12 +595,12 @@ CONTAINS

END FUNCTION
#:endif

SUBROUTINE ${ftn}$_OWNER_GET_DEVICE_DATA (SELF, MODE, PTR, QUEUE)
SUBROUTINE ${ftn}$_OWNER_GET_DEVICE_DATA (SELF, MODE, PTR, QUEUE, BLK_BOUNDS)
CLASS(${ftn}$_OWNER) :: SELF
INTEGER (KIND=JPIM), INTENT(IN) :: MODE
${ft.type}$, POINTER, INTENT(INOUT) :: PTR(${ft.shape}$)
INTEGER (KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: BLK_BOUNDS(2)

IF(SELF%GET_STATUS ()==UNALLOCATED)THEN
CALL SELF%CREATE_HOST_DATA ()
Expand All @@ -588,23 +610,25 @@ CONTAINS
CALL SELF%SET_STATUS (NHSTFRESH)
ENDIF
ENDIF
CALL SELF%${ftn}$_GET_DEVICE_DATA(MODE, PTR, QUEUE)
CALL SELF%${ftn}$_GET_DEVICE_DATA(MODE, PTR, QUEUE, BLK_BOUNDS)

END SUBROUTINE ${ftn}$_OWNER_GET_DEVICE_DATA

SUBROUTINE ${ftn}$_GET_DEVICE_DATA_RDONLY (SELF, PPTR, QUEUE)
SUBROUTINE ${ftn}$_GET_DEVICE_DATA_RDONLY (SELF, PPTR, QUEUE, BLK_BOUNDS)
CLASS(${ftn}$) :: SELF
${ft.type}$, POINTER, INTENT(INOUT) :: PPTR(${ft.shape}$)
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: BLK_BOUNDS(2)

CALL SELF%GET_DEVICE_DATA (NRD, PPTR, QUEUE)

END SUBROUTINE ${ftn}$_GET_DEVICE_DATA_RDONLY

SUBROUTINE ${ftn}$_GET_DEVICE_DATA_WRONLY (SELF, PPTR, QUEUE)
SUBROUTINE ${ftn}$_GET_DEVICE_DATA_WRONLY (SELF, PPTR, QUEUE, BLK_BOUNDS)
CLASS(${ftn}$) :: SELF
${ft.type}$, POINTER, INTENT(INOUT) :: PPTR(${ft.shape}$)
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: BLK_BOUNDS(2)

CALL SELF%SET_STATUS (IOR (SELF%GET_STATUS (), NDEVFRESH))
CALL SELF%GET_DEVICE_DATA (NWR, PPTR, QUEUE)
Expand All @@ -620,12 +644,13 @@ CONTAINS

END SUBROUTINE ${ftn}$_SYNC_DEVICE_RDONLY

SUBROUTINE ${ftn}$_GET_DEVICE_DATA_RDWR (SELF, PPTR, QUEUE)
SUBROUTINE ${ftn}$_GET_DEVICE_DATA_RDWR (SELF, PPTR, QUEUE, BLK_BOUNDS)
CLASS(${ftn}$) :: SELF
${ft.type}$, POINTER, INTENT(INOUT) :: PPTR(${ft.shape}$)
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE

CALL SELF%GET_DEVICE_DATA (IOR (NRD, NWR), PPTR, QUEUE)
INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: BLK_BOUNDS(2)

CALL SELF%GET_DEVICE_DATA (IOR (NRD, NWR), PPTR, QUEUE, BLK_BOUNDS=BLK_BOUNDS)

END SUBROUTINE ${ftn}$_GET_DEVICE_DATA_RDWR

Expand Down
26 changes: 26 additions & 0 deletions field_data_module.fypp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#! (C) Copyright 2022- ECMWF.
#! (C) Copyright 2022- Meteo-France.
#!
#! This software is licensed under the terms of the Apache Licence Version 2.0
#! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
#! In applying this licence, ECMWF does not waive the privileges and immunities
#! granted to it by virtue of its status as an intergovernmental organisation
#! nor does it submit to any jurisdiction.

MODULE FIELD_DATA_MODULE

#:set fieldTypeList = fieldType.getFieldTypeList()
#:for ft in fieldTypeList
USE FIELD_${ft.rank}$${ft.suffix}$_DATA_MODULE, ONLY : ${ft.name}$_COPY_ARRAY
#:endfor

IMPLICIT NONE

INTERFACE COPY_ARRAY
#:for ft in fieldTypeList
MODULE PROCEDURE ${ft.name}$_COPY_ARRAY
#:endfor

END INTERFACE

END MODULE
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ list(APPEND TEST_FILES
test_field1d.F90
test_field_array.F90
test_field_delete_on_null.F90
test_get_device_data_bounds.F90
test_get_device_data_wronly.F90
test_get_device_data_non_contiguous.F90
test_host_mem_pool.F90
Expand Down
69 changes: 69 additions & 0 deletions tests/test_get_device_data_bounds.F90
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
! (C) Copyright 2022- ECMWF.
! (C) Copyright 2022- Meteo-France.
!
! This software is licensed under the terms of the Apache Licence Version 2.0
! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
! In applying this licence, ECMWF does not waive the privileges and immunities
! granted to it by virtue of its status as an intergovernmental organisation
! nor does it submit to any jurisdiction.

PROGRAM TEST_GET_DEVICE_DATA_BOUNDS

USE FIELD_MODULE
USE FIELD_FACTORY_MODULE
USE PARKIND1
USE FIELD_ABORT_MODULE
IMPLICIT NONE

CLASS(FIELD_2RB), POINTER :: F_PTR => NULL()
REAL(KIND=JPRB), POINTER :: PTR_CPU(:,:)
REAL(KIND=JPRB), POINTER :: PTR_GPU(:,:)
LOGICAL :: OKAY
INTEGER :: I,J

CALL FIELD_NEW(F_PTR, LBOUNDS=[1,1], UBOUNDS=[128,3], PERSISTENT=.TRUE.)
CALL F_PTR%GET_HOST_DATA_RDWR(PTR_CPU)
PTR_CPU(:,1) = 42
PTR_CPU(:,2) = 42
PTR_CPU(:,3) = 37

CALL F_PTR%GET_DEVICE_DATA_RDWR(PTR_GPU, BLK_BOUNDS=[1,2])
OKAY=.TRUE.

!$acc serial, present(PTR_GPU), copy(OKAY)
DO I=1,128
DO J = 1,2
IF ( PTR_GPU(I,J) /= 42 ) THEN
OKAY =.FALSE.
END IF
END DO
END DO

IF (OKAY == .TRUE.) THEN
DO I=1,128
DO J = 1,2
PTR_GPU(I,J) = 32
END DO
END DO
END IF
!$acc end serial

IF (.NOT. OKAY) THEN
CALL FIELD_ABORT("ERROR DATA NOT UPDATED ON DEVICE")
END IF

CALL F_PTR%SYNC_HOST_RDWR()
DO I=1,128
DO J = 1,2
IF ( PTR_CPU(I,J) /= 32 ) THEN
OKAY =.FALSE.
END IF
END DO
END DO

IF (.NOT. OKAY) THEN
CALL FIELD_ABORT("ERROR HOST DATA NOT UPDATED BY SYNC_HOST_RDWR")
END IF

END PROGRAM TEST_GET_DEVICE_DATA_BOUNDS

0 comments on commit aef8344

Please sign in to comment.