From 74e542bb2a550e90812f6f7ec203c4d90d178a4e Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Fri, 1 Mar 2024 14:10:54 -0800 Subject: [PATCH 01/12] src: Begin adding ibput/ibget API --- src/data_c.c4 | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/src/data_c.c4 b/src/data_c.c4 index 2083fb0b..48bb1b4f 100644 --- a/src/data_c.c4 +++ b/src/data_c.c4 @@ -163,6 +163,26 @@ define(`SHMEM_PROF_DEF_CTX_IPUT_N', #define shmem_ctx_iput$1 pshmem_ctx_iput$1')dnl SHMEM_DEFINE_FOR_SIZES(`SHMEM_PROF_DEF_CTX_IPUT_N') +define(`SHMEM_PROF_DEF_IBPUT', +`#pragma weak shmemx_$1_ibput = pshmemx_$1_ibput +#define shmemx_$1_ibput pshmemx_$1_ibput')dnl +SHMEM_DEFINE_FOR_RMA(`SHMEM_PROF_DEF_IBPUT') + +define(`SHMEM_PROF_DEF_CTX_IBPUT', +`#pragma weak shmemx_ctx_$1_ibput = pshmemx_ctx_$1_ibput +#define shmemx_ctx_$1_ibput pshmemx_ctx_$1_ibput')dnl +SHMEM_DEFINE_FOR_RMA(`SHMEM_PROF_DEF_CTX_IBPUT') + +define(`SHMEM_PROF_DEF_IBPUT_N', +`#pragma weak shmemx_ibput$1 = pshmemx_ibput$1 +#define shmemx_ibput$1 pshmemx_ibput$1')dnl +SHMEM_DEFINE_FOR_SIZES(`SHMEM_PROF_DEF_IBPUT_N') + +define(`SHMEM_PROF_DEF_CTX_IBPUT_N', +`#pragma weak shmemx_ctx_ibput$1 = pshmemx_ctx_ibput$1 +#define shmemx_ctx_ibput$1 pshmemx_ctx_ibput$1')dnl +SHMEM_DEFINE_FOR_SIZES(`SHMEM_PROF_DEF_CTX_IBPUT_N') + define(`SHMEM_PROF_DEF_IGET', `#pragma weak shmem_$1_iget = pshmem_$1_iget #define shmem_$1_iget pshmem_$1_iget')dnl @@ -439,6 +459,31 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') } \ } +#define SHMEM_DEF_IBPUT(STYPE,TYPE) \ + void SHMEM_FUNCTION_ATTRIBUTES \ + SHMEM_FUNC_PROTOTYPE(STYPE##_ibput, TYPE *target, \ + const TYPE *source, ptrdiff_t tst, \ + ptrdiff_t sst, size_t bsize, size_t nblocks, int pe) \ + SHMEM_ERR_CHECK_INITIALIZED(); \ + SHMEM_ERR_CHECK_PE(pe); \ + SHMEM_ERR_CHECK_CTX(ctx); \ + SHMEM_ERR_CHECK_POSITIVE(tst); \ + SHMEM_ERR_CHECK_POSITIVE(sst); \ + SHMEM_ERR_CHECK_SYMMETRIC(target, sizeof(TYPE) * ((nblocks-1) * tst + 1)); \ + SHMEM_ERR_CHECK_NULL(source, nblocks); \ + SHMEM_ERR_CHECK_OVERLAP(target, source, \ + sizeof(TYPE) * ((nblocks-1) * tst + 1), \ + sizeof(TYPE) * ((nblocks-1) * sst + 1), 0); \ + size_t i = 0; \ + for ( ; nblocks > 0 ; --nblocks) { \ + for (i = 0; i < bsize; i++) { \ + shmem_internal_put_scalar(ctx, target++, source++, \ + sizeof(TYPE), pe); \ + } \ + target += (tst-bsize); \ + source += (sst-bsize); \ + } \ + } #define SHMEM_DEF_IPUT_N(NAME,SIZE) \ void SHMEM_FUNCTION_ATTRIBUTES \ @@ -464,6 +509,34 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') } \ } +#define SHMEM_DEF_IBPUT_N(NAME,SIZE) \ + void SHMEM_FUNCTION_ATTRIBUTES \ + SHMEM_FUNC_PROTOTYPE(ibput##NAME, void *target, \ + const void *source, ptrdiff_t tst, \ + ptrdiff_t sst, size_t bsize, size_t nblocks, int pe) \ + SHMEM_ERR_CHECK_INITIALIZED(); \ + SHMEM_ERR_CHECK_PE(pe); \ + SHMEM_ERR_CHECK_CTX(ctx); \ + SHMEM_ERR_CHECK_POSITIVE(tst); \ + SHMEM_ERR_CHECK_POSITIVE(sst); \ + SHMEM_ERR_CHECK_SYMMETRIC(target, \ + (SIZE) * ((nblocks-1) * tst + 1)); \ + SHMEM_ERR_CHECK_NULL(source, nelems); \ + SHMEM_ERR_CHECK_OVERLAP(target, source, \ + (SIZE) * ((nblocks-1) * tst + 1), \ + (SIZE) * ((nblocks-1) * sst + 1), 0); \ + size_t i = 0; \ + for ( ; nblocks > 0 ; --nblocks) { \ + for (i = 0; i < bsize; i++) { \ + shmem_internal_put_scalar(ctx, target, source, (SIZE), \ + pe); \ + target = (uint8_t*)target + SIZE; \ + source = (uint8_t*)source + SIZE; \ + } \ + target = (uint8_t*)target + (tst-bsize)*(SIZE); \ + source = (uint8_t*)source + (sst-bsize)*(SIZE); \ + } \ + } #define SHMEM_DEF_IGET(STYPE,TYPE) \ void SHMEM_FUNCTION_ATTRIBUTES \ @@ -639,6 +712,8 @@ SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_GET_N_NBI') SHMEM_DEF_GET_N_NBI(`mem', `1') SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_IPUT') SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_IPUT_N') +SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_IBPUT') +SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_IBPUT_N') SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_IGET') SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_IGET_N') @@ -676,6 +751,8 @@ SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_GET_N_NBI') SHMEM_DEF_GET_N_NBI(`mem', `1') SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_IPUT') SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_IPUT_N') +SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_IBPUT') +SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_IBPUT_N') SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_IGET') SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_IGET_N') From 0e31f800db00cce15c523b4239aadbf0e37b0027 Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Mon, 4 Mar 2024 08:54:55 -0800 Subject: [PATCH 02/12] Add IBGET implementations --- mpp/shmemx_c_func.h4 | 49 ++++++++++++++++++++++++++ src/data_c.c4 | 84 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 1 deletion(-) diff --git a/mpp/shmemx_c_func.h4 b/mpp/shmemx_c_func.h4 index e1e28336..16ebb602 100644 --- a/mpp/shmemx_c_func.h4 +++ b/mpp/shmemx_c_func.h4 @@ -27,6 +27,55 @@ SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_ct_wait(shmemx_ct_t ct, long wait_f SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_register_gettid(uint64_t (*gettid_fn)(void)); +/* Block-Strided RMA Routines */ +define(`SHMEM_C_IBPUT', +`SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_$1_ibput($2 *target, const $2 *source, +SH_PAD(`$1') ptrdiff_t tst, ptrdiff_t sst, +SH_PAD(`$1') size_t bsize, size_t nblocks, int pe)')dnl +SHMEM_DECLARE_FOR_RMA(`SHMEM_C_IBPUT') + +define(`SHMEM_C_CTX_IBPUT', +`SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_ctx_$1_ibput(shmem_ctx_t ctx, $2 *target, const $2 *source, +SH_PAD(`$1') ptrdiff_t tst, ptrdiff_t sst, +SH_PAD(`$1') size_t bsize, size_t nblocks, int pe)')dnl +SHMEM_DECLARE_FOR_RMA(`SHMEM_C_CTX_IBPUT') + +define(`SHMEM_C_IBPUT_N', +`SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_ibput$1(void *target, const void *source, +SH_PAD(`$1') ptrdiff_t tst, ptrdiff_t sst, size_t bsize, size_t nblocks, +SH_PAD(`$1') int pe)')dnl +SHMEM_DECLARE_FOR_SIZES(`SHMEM_C_IBPUT_N') + +define(`SHMEM_C_CTX_IBPUT_N', +`SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_ctx_ibput$1(shmem_ctx_t ctx, void *target, const void *source, +SH_PAD(`$1') ptrdiff_t tst, ptrdiff_t sst, size_t bsize, size_t nblocks, +SH_PAD(`$1') int pe)')dnl +SHMEM_DECLARE_FOR_SIZES(`SHMEM_C_CTX_IBPUT_N') + +define(`SHMEM_C_IBGET', +`SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_$1_ibget($2 *target, const $2 *source, +SH_PAD(`$1') ptrdiff_t tst, ptrdiff_t sst, +SH_PAD(`$1') size_t bsize, size_t nblocks, int pe)')dnl +SHMEM_DECLARE_FOR_RMA(`SHMEM_C_IBGET') + +define(`SHMEM_C_CTX_IBGET', +`SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_ctx_$1_ibget(shmem_ctx_t ctx, $2 *target, const $2 *source, +SH_PAD(`$1') ptrdiff_t tst, ptrdiff_t sst, +SH_PAD(`$1') size_t bsize, size_t nblocks, int pe)')dnl +SHMEM_DECLARE_FOR_RMA(`SHMEM_C_CTX_IBGET') + +define(`SHMEM_C_IBGET_N', +`SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_ibget$1(void* target, const void *source, +SH_PAD(`$1') ptrdiff_t tst, ptrdiff_t sst, +SH_PAD(`$1') size_t bsize, size_t nblocks, int pe)')dnl +SHMEM_DECLARE_FOR_SIZES(`SHMEM_C_IBGET_N') + +define(`SHMEM_C_CTX_IBGET_N', +`SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_ctx_ibget$1(shmem_ctx_t ctx, void* target, const void *source, +SH_PAD(`$1') ptrdiff_t tst, ptrdiff_t sst, +SH_PAD(`$1') size_t bsize, size_t nblocks, int pe)')dnl +SHMEM_DECLARE_FOR_SIZES(`SHMEM_C_CTX_IBGET_N') + /* Performance Counter Query Routines */ SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_pcntr_get_issued_write(shmem_ctx_t ctx, uint64_t *cntr_value); SHMEM_FUNCTION_ATTRIBUTES void SHPRE()shmemx_pcntr_get_issued_read(shmem_ctx_t ctx, uint64_t *cntr_value); diff --git a/src/data_c.c4 b/src/data_c.c4 index 48bb1b4f..e308247b 100644 --- a/src/data_c.c4 +++ b/src/data_c.c4 @@ -203,6 +203,26 @@ define(`SHMEM_PROF_DEF_CTX_IGET_N', #define shmem_ctx_iget$1 pshmem_ctx_iget$1')dnl SHMEM_DEFINE_FOR_SIZES(`SHMEM_PROF_DEF_CTX_IGET_N') +define(`SHMEM_PROF_DEF_IBGET', +`#pragma weak shmemx_$1_ibget = pshmemx_$1_ibget +#define shmemx_$1_ibget pshmemx_$1_ibget')dnl +SHMEM_DEFINE_FOR_RMA(`SHMEM_PROF_DEF_IBGET') + +define(`SHMEM_PROF_DEF_CTX_IBGET', +`#pragma weak shmemx_ctx_$1_ibget = pshmemx_ctx_$1_ibget +#define shmemx_ctx_$1_ibget pshmemx_ctx_$1_ibget')dnl +SHMEM_DEFINE_FOR_RMA(`SHMEM_PROF_DEF_CTX_IBGET') + +define(`SHMEM_PROF_DEF_IBGET_N', +`#pragma weak shmemx_ibget$1 = pshmemx_ibget$1 +#define shmemx_ibget$1 pshmemx_ibget$1')dnl +SHMEM_DEFINE_FOR_SIZES(`SHMEM_PROF_DEF_IBGET_N') + +define(`SHMEM_PROF_DEF_CTX_IBGET_N', +`#pragma weak shmemx_ctx_ibget$1 = pshmemx_ctx_ibget$1 +#define shmemx_ctx_ibget$1 pshmemx_ctx_ibget$1')dnl +SHMEM_DEFINE_FOR_SIZES(`SHMEM_PROF_DEF_CTX_IBGET_N') + /* Blocking put with signal */ define(`SHMEM_PROF_DEF_PUT_SIGNAL', `#pragma weak shmem_$1_put_signal = pshmem_$1_put_signal @@ -521,7 +541,7 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_POSITIVE(sst); \ SHMEM_ERR_CHECK_SYMMETRIC(target, \ (SIZE) * ((nblocks-1) * tst + 1)); \ - SHMEM_ERR_CHECK_NULL(source, nelems); \ + SHMEM_ERR_CHECK_NULL(source, nblocks); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ (SIZE) * ((nblocks-1) * tst + 1), \ (SIZE) * ((nblocks-1) * sst + 1), 0); \ @@ -563,6 +583,34 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') shmem_internal_get_wait(ctx); \ } +#define SHMEM_DEF_IBGET(STYPE,TYPE) \ + void SHMEM_FUNCTION_ATTRIBUTES \ + SHMEM_FUNC_PROTOTYPE(STYPE##_ibget, TYPE *target, \ + const TYPE *source, \ + ptrdiff_t tst, ptrdiff_t sst, \ + size_t bsize, size_t nblocks, int pe) \ + SHMEM_ERR_CHECK_INITIALIZED(); \ + SHMEM_ERR_CHECK_PE(pe); \ + SHMEM_ERR_CHECK_CTX(ctx); \ + SHMEM_ERR_CHECK_POSITIVE(tst); \ + SHMEM_ERR_CHECK_POSITIVE(sst); \ + SHMEM_ERR_CHECK_SYMMETRIC(source, sizeof(TYPE) * ((nblocks-1) * sst + 1)); \ + SHMEM_ERR_CHECK_NULL(target, nblocks); \ + SHMEM_ERR_CHECK_OVERLAP(target, source, \ + sizeof(TYPE) * ((nblocks-1) * tst + 1), \ + sizeof(TYPE) * ((nblocks-1) * sst + 1), 0); \ + size_t i = 0; \ + for ( ; nblocks > 0 ; --nblocks) { \ + for (i = 0; i < bsize; i++) { \ + shmem_internal_get(ctx, target++, source++, sizeof(TYPE), \ + pe); \ + } \ + target += (tst-bsize); \ + source += (sst-bsize); \ + } \ + shmem_internal_get_wait(ctx); \ + } + #define SHMEM_DEF_IGET_N(NAME,SIZE) \ void SHMEM_FUNCTION_ATTRIBUTES \ SHMEM_FUNC_PROTOTYPE(iget##NAME, void *target, \ @@ -588,6 +636,36 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') shmem_internal_get_wait(ctx); \ } +#define SHMEM_DEF_IBGET_N(NAME,SIZE) \ + void SHMEM_FUNCTION_ATTRIBUTES \ + SHMEM_FUNC_PROTOTYPE(ibget##NAME, void *target, \ + const void *source, ptrdiff_t tst, \ + ptrdiff_t sst, size_t bsize, \ + size_t nblocks, int pe) \ + SHMEM_ERR_CHECK_INITIALIZED(); \ + SHMEM_ERR_CHECK_PE(pe); \ + SHMEM_ERR_CHECK_CTX(ctx); \ + SHMEM_ERR_CHECK_POSITIVE(tst); \ + SHMEM_ERR_CHECK_POSITIVE(sst); \ + SHMEM_ERR_CHECK_SYMMETRIC(source, \ + (SIZE) * ((nblocks-1) * sst + 1)); \ + SHMEM_ERR_CHECK_NULL(target, nblocks); \ + SHMEM_ERR_CHECK_OVERLAP(target, source, \ + (SIZE) * ((nblocks-1) * tst + 1), \ + (SIZE) * ((nblocks-1) * sst + 1), 0); \ + size_t i = 0; \ + for ( ; nblocks > 0 ; --nblocks) { \ + for (i = 0; i < bsize; i++) { \ + shmem_internal_get(ctx, target, source, (SIZE), pe);\ + target = (uint8_t*)target + SIZE; \ + source = (uint8_t*)source + SIZE; \ + } \ + target = (uint8_t*)target + (tst-bsize)*(SIZE); \ + source = (uint8_t*)source + (sst-bsize)*(SIZE); \ + } \ + shmem_internal_get_wait(ctx); \ + } + #define SHMEM_DEF_PUT_SIGNAL(STYPE,TYPE) \ void SHMEM_FUNCTION_ATTRIBUTES \ SHMEM_FUNC_PROTOTYPE(STYPE##_put_signal, TYPE *target, \ @@ -716,6 +794,8 @@ SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_IBPUT') SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_IBPUT_N') SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_IGET') SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_IGET_N') +SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_IBGET') +SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_IBGET_N') SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_PUT_SIGNAL') SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_PUT_N_SIGNAL') @@ -755,6 +835,8 @@ SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_IBPUT') SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_IBPUT_N') SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_IGET') SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_IGET_N') +SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_IBGET') +SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_IBGET_N') SHMEM_DEFINE_FOR_RMA(`SHMEM_DEF_PUT_SIGNAL') SHMEM_DEFINE_FOR_SIZES(`SHMEM_DEF_PUT_N_SIGNAL') From 0676f43b3f9051383a027e251bbd657242f14471 Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Mon, 4 Mar 2024 11:59:22 -0800 Subject: [PATCH 03/12] Fixed 'aliasing undefined symbol' issue --- src/data_c.c4 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/data_c.c4 b/src/data_c.c4 index e308247b..1d605216 100644 --- a/src/data_c.c4 +++ b/src/data_c.c4 @@ -481,7 +481,7 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') #define SHMEM_DEF_IBPUT(STYPE,TYPE) \ void SHMEM_FUNCTION_ATTRIBUTES \ - SHMEM_FUNC_PROTOTYPE(STYPE##_ibput, TYPE *target, \ + SHMEMX_FUNC_PROTOTYPE(STYPE##_ibput, TYPE *target, \ const TYPE *source, ptrdiff_t tst, \ ptrdiff_t sst, size_t bsize, size_t nblocks, int pe) \ SHMEM_ERR_CHECK_INITIALIZED(); \ @@ -531,7 +531,7 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') #define SHMEM_DEF_IBPUT_N(NAME,SIZE) \ void SHMEM_FUNCTION_ATTRIBUTES \ - SHMEM_FUNC_PROTOTYPE(ibput##NAME, void *target, \ + SHMEMX_FUNC_PROTOTYPE(ibput##NAME, void *target, \ const void *source, ptrdiff_t tst, \ ptrdiff_t sst, size_t bsize, size_t nblocks, int pe) \ SHMEM_ERR_CHECK_INITIALIZED(); \ @@ -585,7 +585,7 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') #define SHMEM_DEF_IBGET(STYPE,TYPE) \ void SHMEM_FUNCTION_ATTRIBUTES \ - SHMEM_FUNC_PROTOTYPE(STYPE##_ibget, TYPE *target, \ + SHMEMX_FUNC_PROTOTYPE(STYPE##_ibget, TYPE *target, \ const TYPE *source, \ ptrdiff_t tst, ptrdiff_t sst, \ size_t bsize, size_t nblocks, int pe) \ @@ -638,7 +638,7 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') #define SHMEM_DEF_IBGET_N(NAME,SIZE) \ void SHMEM_FUNCTION_ATTRIBUTES \ - SHMEM_FUNC_PROTOTYPE(ibget##NAME, void *target, \ + SHMEMX_FUNC_PROTOTYPE(ibget##NAME, void *target, \ const void *source, ptrdiff_t tst, \ ptrdiff_t sst, size_t bsize, \ size_t nblocks, int pe) \ From 5a502553383df1fb4a9b6f6471770b3cdc069e77 Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Mon, 4 Mar 2024 12:58:33 -0800 Subject: [PATCH 04/12] src: Add additional error-checking to IPUT/IGET APIs --- src/data_c.c4 | 14 +++++++++++--- src/shmem_internal.h | 10 +++++++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/data_c.c4 b/src/data_c.c4 index 1d605216..29bc6ab4 100644 --- a/src/data_c.c4 +++ b/src/data_c.c4 @@ -490,7 +490,9 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_POSITIVE(tst); \ SHMEM_ERR_CHECK_POSITIVE(sst); \ SHMEM_ERR_CHECK_SYMMETRIC(target, sizeof(TYPE) * ((nblocks-1) * tst + 1)); \ - SHMEM_ERR_CHECK_NULL(source, nblocks); \ + SHMEM_ERR_CHECK_NULL(source, nblocks); \ + SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(tst, bsize); \ + SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(sst, bsize); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ sizeof(TYPE) * ((nblocks-1) * tst + 1), \ sizeof(TYPE) * ((nblocks-1) * sst + 1), 0); \ @@ -542,6 +544,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(target, \ (SIZE) * ((nblocks-1) * tst + 1)); \ SHMEM_ERR_CHECK_NULL(source, nblocks); \ + SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(tst, bsize); \ + SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(sst, bsize); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ (SIZE) * ((nblocks-1) * tst + 1), \ (SIZE) * ((nblocks-1) * sst + 1), 0); \ @@ -595,7 +599,9 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_POSITIVE(tst); \ SHMEM_ERR_CHECK_POSITIVE(sst); \ SHMEM_ERR_CHECK_SYMMETRIC(source, sizeof(TYPE) * ((nblocks-1) * sst + 1)); \ - SHMEM_ERR_CHECK_NULL(target, nblocks); \ + SHMEM_ERR_CHECK_NULL(target, nblocks); \ + SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(tst, bsize); \ + SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(sst, bsize); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ sizeof(TYPE) * ((nblocks-1) * tst + 1), \ sizeof(TYPE) * ((nblocks-1) * sst + 1), 0); \ @@ -649,7 +655,9 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_POSITIVE(sst); \ SHMEM_ERR_CHECK_SYMMETRIC(source, \ (SIZE) * ((nblocks-1) * sst + 1)); \ - SHMEM_ERR_CHECK_NULL(target, nblocks); \ + SHMEM_ERR_CHECK_NULL(target, nblocks); \ + SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(tst, bsize); \ + SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(sst, bsize); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ (SIZE) * ((nblocks-1) * tst + 1), \ (SIZE) * ((nblocks-1) * sst + 1), 0); \ diff --git a/src/shmem_internal.h b/src/shmem_internal.h index 5befc852..7daa214b 100644 --- a/src/shmem_internal.h +++ b/src/shmem_internal.h @@ -347,6 +347,13 @@ extern hwloc_topology_t shmem_internal_topology; #op, (int) (op)); \ } \ } while (0) +#define SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(stride, bsize) \ + do { \ + if (stride < bsize) { \ + RAISE_ERROR_MSG("Stride argument \"%s\" (%zu), is less than block size %zu\n", \ + #stride, stride, bsize); + } \ + } while(0) #else #define SHMEM_ERR_CHECK_INITIALIZED() @@ -361,7 +368,8 @@ extern hwloc_topology_t shmem_internal_topology; #define SHMEM_ERR_CHECK_OVERLAP(ptr1, ptr2, size1, size2, complete_overlap_allowed) #define SHMEM_ERR_CHECK_NULL(ptr, nelems) #define SHMEM_ERR_CHECK_CMP_OP(op) -#define SHMEM_ERR_CHECK_SIG_OP(op) \ +#define SHMEM_ERR_CHECK_SIG_OP(op) +#define SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(stride, bsize) #endif /* ENABLE_ERROR_CHECKING */ From 820b08bd5219feeb77e50e8482101f0936da4538 Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Tue, 5 Mar 2024 07:44:33 -0800 Subject: [PATCH 05/12] Add generic IBPUT/IBGET to shmemx.h --- mpp/shmemx.h4 | 48 ++++++++++++++++++++++++++++++++++++++++++++ src/shmem_internal.h | 3 ++- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/mpp/shmemx.h4 b/mpp/shmemx.h4 index 693be25c..dbc9ec9a 100644 --- a/mpp/shmemx.h4 +++ b/mpp/shmemx.h4 @@ -42,6 +42,54 @@ include(shmemx_c_func.h4)dnl #ifdef __cplusplus } /* extern "C" */ +define(`SHMEM_CXX_IBPUT', +`static inline void shmemx_ibput($2 *target, const $2 *source, + ptrdiff_t tst, ptrdiff_t sst, + size_t bsize, size_t nblocks, int pe) { + shmemx_$1_ibput(target, source, tst, sst, bsize, nblocks, pe); +} +static inline void shmemx_ibput(shmem_ctx_t ctx, $2 *target, const $2 *source, + ptrdiff_t tst, ptrdiff_t sst, + size_t bsize, size_t nblocks, int pe) { + shmemx_ctx_$1_ibput(ctx, target, source, tst, sst, bsize, nblocks, pe); +}')dnl +SHMEM_CXX_DEFINE_FOR_RMA(`SHMEM_CXX_IBPUT') + +define(`SHMEM_CXX_IBGET', +`static inline void shmemx_ibget($2 *target, const $2 *source, + ptrdiff_t tst, ptrdiff_t sst, + size_t bsize, size_t nblocks, int pe) { + shmemx_$1_ibget(target, source, tst, sst, bsize, nblocks, pe); +} +static inline void shmemx_ibget(shmem_ctx_t ctx, $2 *target, const $2 *source, + ptrdiff_t tst, ptrdiff_t sst, + size_t bsize, size_t nblocks, int pe) { + shmemx_ctx_$1_ibget(ctx, target, source, tst, sst, bsize, nblocks, pe); +}')dnl +SHMEM_CXX_DEFINE_FOR_RMA(`SHMEM_CXX_IBGET') + +define(`SHMEM_C11_GEN_IBPUT', ` $2*: shmemx_$1_ibput')dnl +define(`SHMEM_CTX_C11_GEN_IBPUT', ` $2*: shmemx_ctx_$1_ibput')dnl +#define shmemx_ibput(...) \ + _Generic(SHMEM_C11_TYPE_EVAL_PTR(SHMEM_C11_ARG0(__VA_ARGS__)), \ + shmem_ctx_t: \ + _Generic(SHMEM_C11_TYPE_EVAL_PTR(SHMEM_C11_ARG1(__VA_ARGS__)), \ +SHMEM_BIND_C11_RMA(`SHMEM_CTX_C11_GEN_IBPUT', `, \') \ + ), \ +SHMEM_BIND_C11_RMA(`SHMEM_C11_GEN_IBPUT', `, \') \ + )(__VA_ARGS__) + +define(`SHMEM_C11_GEN_IBGET', ` $2*: shmemx_$1_ibget')dnl +define(`SHMEM_CTX_C11_GEN_IBGET', ` $2*: shmemx_ctx_$1_ibget')dnl +#define shmemx_ibget(...) \ + _Generic(SHMEM_C11_TYPE_EVAL_PTR(SHMEM_C11_ARG0(__VA_ARGS__)), \ + shmem_ctx_t: \ + _Generic(SHMEM_C11_TYPE_EVAL_PTR(SHMEM_C11_ARG1(__VA_ARGS__)), \ +SHMEM_BIND_C11_RMA(`SHMEM_CTX_C11_GEN_IBGET', `, \') \ + ), \ +SHMEM_BIND_C11_RMA(`SHMEM_C11_GEN_IBGET', `, \') \ + )(__VA_ARGS__) + /* C11 Generic Macros */ #elif (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(SHMEM_INTERNAL_INCLUDE)) diff --git a/src/shmem_internal.h b/src/shmem_internal.h index 7daa214b..5785295c 100644 --- a/src/shmem_internal.h +++ b/src/shmem_internal.h @@ -347,11 +347,12 @@ extern hwloc_topology_t shmem_internal_topology; #op, (int) (op)); \ } \ } while (0) + #define SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(stride, bsize) \ do { \ if (stride < bsize) { \ RAISE_ERROR_MSG("Stride argument \"%s\" (%zu), is less than block size %zu\n", \ - #stride, stride, bsize); + #stride, stride, bsize); \ } \ } while(0) From 07feeae5f3b54037fd5cb58195b5fae8cc8bd192 Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Thu, 14 Mar 2024 10:54:06 -0700 Subject: [PATCH 06/12] mpp: Quick fix to shmemx.h4 --- mpp/shmemx.h4 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mpp/shmemx.h4 b/mpp/shmemx.h4 index dbc9ec9a..04bcfb91 100644 --- a/mpp/shmemx.h4 +++ b/mpp/shmemx.h4 @@ -68,6 +68,9 @@ static inline void shmemx_ibget(shmem_ctx_t ctx, $2 *target, const $2 *source, }')dnl SHMEM_CXX_DEFINE_FOR_RMA(`SHMEM_CXX_IBGET') +/* C11 Generic Macros */ +#elif (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(SHMEM_INTERNAL_INCLUDE)) + define(`SHMEM_C11_GEN_IBPUT', ` $2*: shmemx_$1_ibput')dnl define(`SHMEM_CTX_C11_GEN_IBPUT', ` $2*: shmemx_ctx_$1_ibput')dnl #define shmemx_ibput(...) \ @@ -90,9 +93,6 @@ SHMEM_BIND_C11_RMA(`SHMEM_CTX_C11_GEN_IBGET', `, \') \ SHMEM_BIND_C11_RMA(`SHMEM_C11_GEN_IBGET', `, \') \ )(__VA_ARGS__) -/* C11 Generic Macros */ -#elif (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(SHMEM_INTERNAL_INCLUDE)) - #endif /* C11 */ #endif /* SHMEMX_H */ From b2fec6ec07e728db05fd7d31fecd6a6627814800 Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Tue, 26 Mar 2024 10:29:45 -0700 Subject: [PATCH 07/12] src: Add Wasi's feedback. Remove unnecessary bsize loop --- src/data_c.c4 | 91 ++++++++++++++++++++++----------------------------- 1 file changed, 40 insertions(+), 51 deletions(-) diff --git a/src/data_c.c4 b/src/data_c.c4 index 29bc6ab4..bc3e568a 100644 --- a/src/data_c.c4 +++ b/src/data_c.c4 @@ -481,29 +481,27 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') #define SHMEM_DEF_IBPUT(STYPE,TYPE) \ void SHMEM_FUNCTION_ATTRIBUTES \ - SHMEMX_FUNC_PROTOTYPE(STYPE##_ibput, TYPE *target, \ + SHMEMX_FUNC_PROTOTYPE(STYPE##_ibput, TYPE *target, \ const TYPE *source, ptrdiff_t tst, \ ptrdiff_t sst, size_t bsize, size_t nblocks, int pe) \ + if (bsize == 0) return; \ SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ SHMEM_ERR_CHECK_CTX(ctx); \ - SHMEM_ERR_CHECK_POSITIVE(tst); \ - SHMEM_ERR_CHECK_POSITIVE(sst); \ + SHMEM_ERR_CHECK_NON_NEGATIVE(bsize); \ SHMEM_ERR_CHECK_SYMMETRIC(target, sizeof(TYPE) * ((nblocks-1) * tst + 1)); \ SHMEM_ERR_CHECK_NULL(source, nblocks); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(tst, bsize); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(sst, bsize); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ - sizeof(TYPE) * ((nblocks-1) * tst + 1), \ + sizeof(TYPE) * ((nblocks-1) * tst + 1), \ sizeof(TYPE) * ((nblocks-1) * sst + 1), 0); \ size_t i = 0; \ for ( ; nblocks > 0 ; --nblocks) { \ - for (i = 0; i < bsize; i++) { \ - shmem_internal_put_scalar(ctx, target++, source++, \ - sizeof(TYPE), pe); \ - } \ - target += (tst-bsize); \ - source += (sst-bsize); \ + shmem_internal_put_scalar(ctx, target, source, \ + bsize * sizeof(TYPE), pe); \ + target += tst; \ + source += sst; \ } \ } @@ -531,34 +529,30 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') } \ } -#define SHMEM_DEF_IBPUT_N(NAME,SIZE) \ +#define SHMEM_DEF_IBPUT_N(NAME,SIZE) \ void SHMEM_FUNCTION_ATTRIBUTES \ - SHMEMX_FUNC_PROTOTYPE(ibput##NAME, void *target, \ + SHMEMX_FUNC_PROTOTYPE(ibput##NAME, void *target, \ const void *source, ptrdiff_t tst, \ ptrdiff_t sst, size_t bsize, size_t nblocks, int pe) \ + if (bsize == 0) return; \ SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ SHMEM_ERR_CHECK_CTX(ctx); \ - SHMEM_ERR_CHECK_POSITIVE(tst); \ - SHMEM_ERR_CHECK_POSITIVE(sst); \ + SHMEM_ERR_CHECK_NON_NEGATIVE(bsize); \ SHMEM_ERR_CHECK_SYMMETRIC(target, \ (SIZE) * ((nblocks-1) * tst + 1)); \ - SHMEM_ERR_CHECK_NULL(source, nblocks); \ + SHMEM_ERR_CHECK_NULL(source, nblocks); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(tst, bsize); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(sst, bsize); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ - (SIZE) * ((nblocks-1) * tst + 1), \ + (SIZE) * ((nblocks-1) * tst + 1), \ (SIZE) * ((nblocks-1) * sst + 1), 0); \ size_t i = 0; \ - for ( ; nblocks > 0 ; --nblocks) { \ - for (i = 0; i < bsize; i++) { \ - shmem_internal_put_scalar(ctx, target, source, (SIZE), \ - pe); \ - target = (uint8_t*)target + SIZE; \ - source = (uint8_t*)source + SIZE; \ - } \ - target = (uint8_t*)target + (tst-bsize)*(SIZE); \ - source = (uint8_t*)source + (sst-bsize)*(SIZE); \ + for ( ; nblocks > 0 ; --nblocks) { \ + shmem_internal_put_scalar(ctx, target, source, \ + bsize * (SIZE), pe); \ + target = (uint8_t*)target + tst*(SIZE); \ + source = (uint8_t*)source + sst*(SIZE); \ } \ } @@ -587,32 +581,30 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') shmem_internal_get_wait(ctx); \ } -#define SHMEM_DEF_IBGET(STYPE,TYPE) \ +#define SHMEM_DEF_IBGET(STYPE,TYPE) \ void SHMEM_FUNCTION_ATTRIBUTES \ - SHMEMX_FUNC_PROTOTYPE(STYPE##_ibget, TYPE *target, \ + SHMEMX_FUNC_PROTOTYPE(STYPE##_ibget, TYPE *target, \ const TYPE *source, \ ptrdiff_t tst, ptrdiff_t sst, \ - size_t bsize, size_t nblocks, int pe) \ + size_t bsize, size_t nblocks, int pe) \ + if (bsize == 0) return; \ SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ SHMEM_ERR_CHECK_CTX(ctx); \ - SHMEM_ERR_CHECK_POSITIVE(tst); \ - SHMEM_ERR_CHECK_POSITIVE(sst); \ + SHMEM_ERR_CHECK_NON_NEGATIVE(bsize); \ SHMEM_ERR_CHECK_SYMMETRIC(source, sizeof(TYPE) * ((nblocks-1) * sst + 1)); \ SHMEM_ERR_CHECK_NULL(target, nblocks); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(tst, bsize); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(sst, bsize); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ - sizeof(TYPE) * ((nblocks-1) * tst + 1), \ + sizeof(TYPE) * ((nblocks-1) * tst + 1), \ sizeof(TYPE) * ((nblocks-1) * sst + 1), 0); \ size_t i = 0; \ - for ( ; nblocks > 0 ; --nblocks) { \ - for (i = 0; i < bsize; i++) { \ - shmem_internal_get(ctx, target++, source++, sizeof(TYPE), \ - pe); \ - } \ - target += (tst-bsize); \ - source += (sst-bsize); \ + for ( ; nblocks > 0 ; --nblocks) { \ + shmem_internal_get(ctx, target, source, \ + bsize * sizeof(TYPE), pe); \ + target += tst; \ + source += sst; \ } \ shmem_internal_get_wait(ctx); \ } @@ -644,32 +636,29 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') #define SHMEM_DEF_IBGET_N(NAME,SIZE) \ void SHMEM_FUNCTION_ATTRIBUTES \ - SHMEMX_FUNC_PROTOTYPE(ibget##NAME, void *target, \ + SHMEMX_FUNC_PROTOTYPE(ibget##NAME, void *target, \ const void *source, ptrdiff_t tst, \ ptrdiff_t sst, size_t bsize, \ size_t nblocks, int pe) \ + if (bsize == 0) return; \ SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ SHMEM_ERR_CHECK_CTX(ctx); \ - SHMEM_ERR_CHECK_POSITIVE(tst); \ - SHMEM_ERR_CHECK_POSITIVE(sst); \ + SHMEM_ERR_CHECK_NON_NEGATIVE(bsize); \ SHMEM_ERR_CHECK_SYMMETRIC(source, \ (SIZE) * ((nblocks-1) * sst + 1)); \ SHMEM_ERR_CHECK_NULL(target, nblocks); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(tst, bsize); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(sst, bsize); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ - (SIZE) * ((nblocks-1) * tst + 1), \ + (SIZE) * ((nblocks-1) * tst + 1), \ (SIZE) * ((nblocks-1) * sst + 1), 0); \ - size_t i = 0; \ - for ( ; nblocks > 0 ; --nblocks) { \ - for (i = 0; i < bsize; i++) { \ - shmem_internal_get(ctx, target, source, (SIZE), pe);\ - target = (uint8_t*)target + SIZE; \ - source = (uint8_t*)source + SIZE; \ - } \ - target = (uint8_t*)target + (tst-bsize)*(SIZE); \ - source = (uint8_t*)source + (sst-bsize)*(SIZE); \ + size_t i = 0; \ + for ( ; nblocks > 0 ; --nblocks) { \ + shmem_internal_get(ctx, target, source, \ + bsize * (SIZE), pe); \ + target = (uint8_t*)target + tst*(SIZE); \ + source = (uint8_t*)source + sst*(SIZE); \ } \ shmem_internal_get_wait(ctx); \ } From 042d52aa3c252a9f2660e713e8acb3b1cdf6eeb3 Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Mon, 1 Apr 2024 11:07:55 -0700 Subject: [PATCH 08/12] src: Implement Dave's feedback --- src/data_c.c4 | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/src/data_c.c4 b/src/data_c.c4 index bc3e568a..3f8e687f 100644 --- a/src/data_c.c4 +++ b/src/data_c.c4 @@ -488,15 +488,13 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ SHMEM_ERR_CHECK_CTX(ctx); \ - SHMEM_ERR_CHECK_NON_NEGATIVE(bsize); \ - SHMEM_ERR_CHECK_SYMMETRIC(target, sizeof(TYPE) * ((nblocks-1) * tst + 1)); \ + SHMEM_ERR_CHECK_SYMMETRIC(target, sizeof(TYPE) * ((nblocks-1) * tst + bsize)); \ SHMEM_ERR_CHECK_NULL(source, nblocks); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(tst, bsize); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(sst, bsize); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ - sizeof(TYPE) * ((nblocks-1) * tst + 1), \ - sizeof(TYPE) * ((nblocks-1) * sst + 1), 0); \ - size_t i = 0; \ + sizeof(TYPE) * ((nblocks-1) * tst + bsize), \ + sizeof(TYPE) * ((nblocks-1) * sst + bsize), 0); \ for ( ; nblocks > 0 ; --nblocks) { \ shmem_internal_put_scalar(ctx, target, source, \ bsize * sizeof(TYPE), pe); \ @@ -538,16 +536,14 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ SHMEM_ERR_CHECK_CTX(ctx); \ - SHMEM_ERR_CHECK_NON_NEGATIVE(bsize); \ SHMEM_ERR_CHECK_SYMMETRIC(target, \ - (SIZE) * ((nblocks-1) * tst + 1)); \ + (SIZE) * ((nblocks-1) * tst + bsize)); \ SHMEM_ERR_CHECK_NULL(source, nblocks); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(tst, bsize); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(sst, bsize); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ - (SIZE) * ((nblocks-1) * tst + 1), \ - (SIZE) * ((nblocks-1) * sst + 1), 0); \ - size_t i = 0; \ + (SIZE) * ((nblocks-1) * tst + bsize), \ + (SIZE) * ((nblocks-1) * sst + bsize), 0); \ for ( ; nblocks > 0 ; --nblocks) { \ shmem_internal_put_scalar(ctx, target, source, \ bsize * (SIZE), pe); \ @@ -591,15 +587,13 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ SHMEM_ERR_CHECK_CTX(ctx); \ - SHMEM_ERR_CHECK_NON_NEGATIVE(bsize); \ - SHMEM_ERR_CHECK_SYMMETRIC(source, sizeof(TYPE) * ((nblocks-1) * sst + 1)); \ + SHMEM_ERR_CHECK_SYMMETRIC(source, sizeof(TYPE) * ((nblocks-1) * sst + bsize)); \ SHMEM_ERR_CHECK_NULL(target, nblocks); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(tst, bsize); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(sst, bsize); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ - sizeof(TYPE) * ((nblocks-1) * tst + 1), \ - sizeof(TYPE) * ((nblocks-1) * sst + 1), 0); \ - size_t i = 0; \ + sizeof(TYPE) * ((nblocks-1) * tst + bsize), \ + sizeof(TYPE) * ((nblocks-1) * sst + bsize), 0); \ for ( ; nblocks > 0 ; --nblocks) { \ shmem_internal_get(ctx, target, source, \ bsize * sizeof(TYPE), pe); \ @@ -644,16 +638,14 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ SHMEM_ERR_CHECK_CTX(ctx); \ - SHMEM_ERR_CHECK_NON_NEGATIVE(bsize); \ SHMEM_ERR_CHECK_SYMMETRIC(source, \ - (SIZE) * ((nblocks-1) * sst + 1)); \ + (SIZE) * ((nblocks-1) * sst + bsize)); \ SHMEM_ERR_CHECK_NULL(target, nblocks); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(tst, bsize); \ SHMEM_ERR_CHECK_STRIDE_GTE_BSIZE(sst, bsize); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ - (SIZE) * ((nblocks-1) * tst + 1), \ - (SIZE) * ((nblocks-1) * sst + 1), 0); \ - size_t i = 0; \ + (SIZE) * ((nblocks-1) * tst + bsize), \ + (SIZE) * ((nblocks-1) * sst + bsize), 0); \ for ( ; nblocks > 0 ; --nblocks) { \ shmem_internal_get(ctx, target, source, \ bsize * (SIZE), pe); \ From 2fb387f2aa0af8ebdca4ab14006c02a681715c22 Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Tue, 2 Apr 2024 14:40:09 -0700 Subject: [PATCH 09/12] src: ibput/ibget: Use put_nb instead of put_scalar --- src/data_c.c4 | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/data_c.c4 b/src/data_c.c4 index 3f8e687f..b19ab75d 100644 --- a/src/data_c.c4 +++ b/src/data_c.c4 @@ -485,6 +485,7 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') const TYPE *source, ptrdiff_t tst, \ ptrdiff_t sst, size_t bsize, size_t nblocks, int pe) \ if (bsize == 0) return; \ + long completion = 0; \ SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ SHMEM_ERR_CHECK_CTX(ctx); \ @@ -496,11 +497,13 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') sizeof(TYPE) * ((nblocks-1) * tst + bsize), \ sizeof(TYPE) * ((nblocks-1) * sst + bsize), 0); \ for ( ; nblocks > 0 ; --nblocks) { \ - shmem_internal_put_scalar(ctx, target, source, \ - bsize * sizeof(TYPE), pe); \ + shmem_internal_put_nb(ctx, target, source, \ + bsize * sizeof(TYPE), pe, \ + &completion); \ target += tst; \ source += sst; \ } \ + shmem_internal_put_wait(ctx, &completion); \ } #define SHMEM_DEF_IPUT_N(NAME,SIZE) \ @@ -533,6 +536,7 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') const void *source, ptrdiff_t tst, \ ptrdiff_t sst, size_t bsize, size_t nblocks, int pe) \ if (bsize == 0) return; \ + long completion = 0; \ SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ SHMEM_ERR_CHECK_CTX(ctx); \ @@ -545,11 +549,13 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') (SIZE) * ((nblocks-1) * tst + bsize), \ (SIZE) * ((nblocks-1) * sst + bsize), 0); \ for ( ; nblocks > 0 ; --nblocks) { \ - shmem_internal_put_scalar(ctx, target, source, \ - bsize * (SIZE), pe); \ + shmem_internal_put_nb(ctx, target, source, \ + bsize * (SIZE), pe, \ + &completion); \ target = (uint8_t*)target + tst*(SIZE); \ source = (uint8_t*)source + sst*(SIZE); \ } \ + shmem_internal_put_wait(ctx, &completion); \ } #define SHMEM_DEF_IGET(STYPE,TYPE) \ From c12a8c334ee018ceed01c8230977d54d4e67e310 Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Wed, 10 Apr 2024 11:36:37 -0700 Subject: [PATCH 10/12] src: Remove bsize==0 check --- src/data_c.c4 | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/data_c.c4 b/src/data_c.c4 index b19ab75d..0dc28592 100644 --- a/src/data_c.c4 +++ b/src/data_c.c4 @@ -484,7 +484,6 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEMX_FUNC_PROTOTYPE(STYPE##_ibput, TYPE *target, \ const TYPE *source, ptrdiff_t tst, \ ptrdiff_t sst, size_t bsize, size_t nblocks, int pe) \ - if (bsize == 0) return; \ long completion = 0; \ SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ @@ -535,7 +534,6 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEMX_FUNC_PROTOTYPE(ibput##NAME, void *target, \ const void *source, ptrdiff_t tst, \ ptrdiff_t sst, size_t bsize, size_t nblocks, int pe) \ - if (bsize == 0) return; \ long completion = 0; \ SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ @@ -589,7 +587,6 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') const TYPE *source, \ ptrdiff_t tst, ptrdiff_t sst, \ size_t bsize, size_t nblocks, int pe) \ - if (bsize == 0) return; \ SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ SHMEM_ERR_CHECK_CTX(ctx); \ @@ -640,7 +637,6 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') const void *source, ptrdiff_t tst, \ ptrdiff_t sst, size_t bsize, \ size_t nblocks, int pe) \ - if (bsize == 0) return; \ SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_PE(pe); \ SHMEM_ERR_CHECK_CTX(ctx); \ From 8a8ea6dc12914317cd3f7310c9b992243d96ded7 Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Wed, 17 Apr 2024 13:57:30 -0700 Subject: [PATCH 11/12] ci: Enable ability to use branches other than 'main' for submodules --- .github/workflows/ci.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b5a555d4..0a10a9ae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -156,6 +156,7 @@ jobs: echo "OS_NAME=$(lsb_release -si)-$(ls_release -sr)" >> $GITHUB_ENV - uses: actions/checkout@v4 with: + fetch-depth: 0 submodules: 'true' - name: Install dependencies run: | @@ -217,6 +218,7 @@ jobs: # SOS - name: Build SOS (${{ matrix.sos_config }}) run: | + git submodule update --remote ./autogen.sh mkdir build; cd build ../configure --prefix=${SOS_INSTALL_DIR} --with-ofi=${LIBFABRIC_INSTALL_DIR} ${{ matrix.sos_config }} @@ -489,6 +491,7 @@ jobs: echo "OS_NAME=$(lsb_release -si)-$(ls_release -sr)" >> $GITHUB_ENV - uses: actions/checkout@v4 with: + fetch-depth: 0 submodules: 'true' - name: Install dependencies run: | @@ -541,6 +544,7 @@ jobs: # SOS - name: Build SOS (${{ matrix.sos_config }}) run: | + git submodule update --remote ./autogen.sh mkdir build; cd build ../configure --prefix=${SOS_INSTALL_DIR} --with-ucx=${UCX_INSTALL_DIR} ${{ matrix.sos_config }} @@ -581,6 +585,7 @@ jobs: echo "OS_NAME=$(lsb_release -si)-$(ls_release -sr)" >> $GITHUB_ENV - uses: actions/checkout@v4 with: + fetch-depth: 0 submodules: 'true' - name: Install dependencies run: | @@ -633,6 +638,7 @@ jobs: # SOS - name: Build SOS (${{ matrix.name }}) run: | + git submodule update --remote ./autogen.sh mkdir build; cd build ../configure --prefix=${SOS_INSTALL_DIR} --with-portals4=${PORTALS4_INSTALL_DIR} ${{ matrix.sos_config }} @@ -660,6 +666,7 @@ jobs: echo "OS_NAME=$(lsb_release -si)-$(ls_release -sr)" >> $GITHUB_ENV - uses: actions/checkout@v4 with: + fetch-depth: 0 submodules: 'true' - name: Install dependencies run: | @@ -688,6 +695,7 @@ jobs: # SOS - name: Build SOS (${{ matrix.name }}) run: | + git submodule update --remote ./autogen.sh mkdir build; cd build ../configure --prefix=${SOS_INSTALL_DIR} ${{ matrix.sos_config }} From d3cf4b6f94b946c8d8e7e1db4bcaec23461189a6 Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Thu, 18 Apr 2024 14:49:01 -0700 Subject: [PATCH 12/12] src: Cleanup formatting inconsistencies --- src/data_c.c4 | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/data_c.c4 b/src/data_c.c4 index 0dc28592..7274941d 100644 --- a/src/data_c.c4 +++ b/src/data_c.c4 @@ -524,8 +524,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') for ( ; nelems > 0 ; --nelems) { \ shmem_internal_put_scalar(ctx, target, source, (SIZE), \ pe); \ - target = (uint8_t*)target + tst*(SIZE); \ - source = (uint8_t*)source + sst*(SIZE); \ + target = (uint8_t *) target + tst * (SIZE); \ + source = (uint8_t *) source + sst * (SIZE); \ } \ } @@ -550,8 +550,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') shmem_internal_put_nb(ctx, target, source, \ bsize * (SIZE), pe, \ &completion); \ - target = (uint8_t*)target + tst*(SIZE); \ - source = (uint8_t*)source + sst*(SIZE); \ + target = (uint8_t *) target + tst * (SIZE); \ + source = (uint8_t *) source + sst * (SIZE); \ } \ shmem_internal_put_wait(ctx, &completion); \ } @@ -625,8 +625,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') (SIZE) * ((nelems-1) * sst + 1), 0); \ for ( ; nelems > 0 ; --nelems) { \ shmem_internal_get(ctx, target, source, (SIZE), pe);\ - target = (uint8_t*)target + tst*(SIZE); \ - source = (uint8_t*)source + sst*(SIZE); \ + target = (uint8_t *) target + tst * (SIZE); \ + source = (uint8_t *) source + sst * (SIZE); \ } \ shmem_internal_get_wait(ctx); \ } @@ -651,8 +651,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') for ( ; nblocks > 0 ; --nblocks) { \ shmem_internal_get(ctx, target, source, \ bsize * (SIZE), pe); \ - target = (uint8_t*)target + tst*(SIZE); \ - source = (uint8_t*)source + sst*(SIZE); \ + target = (uint8_t *) target + tst * (SIZE); \ + source = (uint8_t *) source + sst * (SIZE); \ } \ shmem_internal_get_wait(ctx); \ }