From 941a87dde6e8024773944d3bf0d5619f58da6109 Mon Sep 17 00:00:00 2001 From: Philip Marshall Date: Fri, 12 Apr 2024 14:35:49 -0700 Subject: [PATCH] src: Skip overlap check for RMA operations if dest_pe != source_pe --- src/collectives_c.c4 | 36 +-- src/data_c.c4 | 48 ++-- src/shmem_internal.h | 34 +-- src/synchronization_c.c4 | 566 +++++++++++++++++++-------------------- 4 files changed, 351 insertions(+), 333 deletions(-) diff --git a/src/collectives_c.c4 b/src/collectives_c.c4 index d9b51bc1..70c8876b 100644 --- a/src/collectives_c.c4 +++ b/src/collectives_c.c4 @@ -226,7 +226,7 @@ shmem_team_sync(shmem_team_t team) SHMEM_ERR_CHECK_SYMMETRIC(pSync, sizeof(long) * \ SHMEM_REDUCE_SYNC_SIZE); \ SHMEM_ERR_CHECK_OVERLAP(target, source, sizeof(TYPE)*nreduce, \ - sizeof(TYPE)*nreduce, 1); \ + sizeof(TYPE)*nreduce, 1, 1); \ \ shmem_internal_op_to_all(target, source, nreduce, sizeof(TYPE), \ PE_start, 1 << logPE_stride, PE_size, \ @@ -244,7 +244,7 @@ shmem_team_sync(shmem_team_t team) SHMEM_ERR_CHECK_SYMMETRIC(dest, sizeof(TYPE)*nreduce); \ SHMEM_ERR_CHECK_SYMMETRIC(source, sizeof(TYPE)*nreduce); \ SHMEM_ERR_CHECK_OVERLAP(dest, source, sizeof(TYPE)*nreduce, \ - sizeof(TYPE)*nreduce, 1); \ + sizeof(TYPE)*nreduce, 1, 1); \ TYPE *pWrk = NULL; \ \ shmem_internal_team_t *myteam = (shmem_internal_team_t *)team; \ @@ -290,7 +290,7 @@ shmem_broadcast32(void *target, const void *source, size_t nlong, SHMEM_ERR_CHECK_SYMMETRIC(target, nlong * 4); SHMEM_ERR_CHECK_SYMMETRIC(source, nlong * 4); SHMEM_ERR_CHECK_SYMMETRIC(pSync, sizeof(long)*SHMEM_BCAST_SYNC_SIZE); - SHMEM_ERR_CHECK_OVERLAP(target, source, nlong * 4, nlong * 4, 1); + SHMEM_ERR_CHECK_OVERLAP(target, source, nlong * 4, nlong * 4, 1, 1); shmem_internal_bcast(target, source, nlong * 4, PE_root, PE_start, 1 << logPE_stride, PE_size, @@ -309,7 +309,7 @@ shmem_broadcast64(void *target, const void *source, size_t nlong, SHMEM_ERR_CHECK_SYMMETRIC(target, nlong * 8); SHMEM_ERR_CHECK_SYMMETRIC(source, nlong * 8); SHMEM_ERR_CHECK_SYMMETRIC(pSync, sizeof(long)*SHMEM_BCAST_SYNC_SIZE); - SHMEM_ERR_CHECK_OVERLAP(target, source, nlong * 8, nlong * 8, 1); + SHMEM_ERR_CHECK_OVERLAP(target, source, nlong * 8, nlong * 8, 1, 1); shmem_internal_bcast(target, source, nlong * 8, PE_root, PE_start, 1 << logPE_stride, PE_size, @@ -325,7 +325,7 @@ shmem_broadcastmem(shmem_team_t team, void *dest, const void *source, SHMEM_ERR_CHECK_TEAM_VALID(team); SHMEM_ERR_CHECK_SYMMETRIC(dest, nelems); SHMEM_ERR_CHECK_SYMMETRIC(source, nelems); - SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems, nelems, 1); + SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems, nelems, 1, 1); shmem_internal_team_t *myteam = (shmem_internal_team_t *)team; long *psync = shmem_internal_team_choose_psync(myteam, BCAST); @@ -351,7 +351,7 @@ shmem_broadcastmem(shmem_team_t team, void *dest, const void *source, SHMEM_ERR_CHECK_SYMMETRIC(dest, nelems * sizeof(TYPE)); \ SHMEM_ERR_CHECK_SYMMETRIC(source, nelems * sizeof(TYPE)); \ SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems * sizeof(TYPE), \ - nelems * sizeof(TYPE), 1); \ + nelems * sizeof(TYPE), 1, 1); \ \ shmem_internal_team_t *myteam = (shmem_internal_team_t *)team; \ long *psync = shmem_internal_team_choose_psync(myteam, BCAST); \ @@ -378,7 +378,7 @@ shmem_collect32(void *target, const void *source, size_t nlong, SHMEM_ERR_CHECK_SYMMETRIC(target, nlong * 4); SHMEM_ERR_CHECK_SYMMETRIC(source, nlong * 4); SHMEM_ERR_CHECK_SYMMETRIC(pSync, sizeof(long) * SHMEM_COLLECT_SYNC_SIZE); - SHMEM_ERR_CHECK_OVERLAP(target, source, nlong * 4, nlong * 4, 1); + SHMEM_ERR_CHECK_OVERLAP(target, source, nlong * 4, nlong * 4, 1, 1); shmem_internal_collect(target, source, nlong * 4, PE_start, 1 << logPE_stride, PE_size, pSync); @@ -394,7 +394,7 @@ shmem_collect64(void *target, const void *source, size_t nlong, SHMEM_ERR_CHECK_SYMMETRIC(target, nlong * 8); SHMEM_ERR_CHECK_SYMMETRIC(source, nlong * 8); SHMEM_ERR_CHECK_SYMMETRIC(pSync, sizeof(long) * SHMEM_COLLECT_SYNC_SIZE); - SHMEM_ERR_CHECK_OVERLAP(target, source, nlong * 8, nlong * 8, 1); + SHMEM_ERR_CHECK_OVERLAP(target, source, nlong * 8, nlong * 8, 1, 1); shmem_internal_collect(target, source, nlong * 8, PE_start, 1 << logPE_stride, PE_size, pSync); @@ -410,7 +410,7 @@ shmem_collect64(void *target, const void *source, size_t nlong, SHMEM_ERR_CHECK_SYMMETRIC(dest, nelems * sizeof(TYPE)); \ SHMEM_ERR_CHECK_SYMMETRIC(source, nelems * sizeof(TYPE)); \ SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems * sizeof(TYPE), \ - nelems * sizeof(TYPE), 1); \ + nelems * sizeof(TYPE), 1, 1); \ \ shmem_internal_team_t *myteam = (shmem_internal_team_t *)team; \ long *psync = shmem_internal_team_choose_psync(myteam, \ @@ -432,7 +432,7 @@ shmem_collectmem(shmem_team_t team, void *dest, const void *source, SHMEM_ERR_CHECK_TEAM_VALID(team); SHMEM_ERR_CHECK_SYMMETRIC(dest, nelems); SHMEM_ERR_CHECK_SYMMETRIC(source, nelems); - SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems, nelems, 1); + SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems, nelems, 1, 1); shmem_internal_team_t *myteam = (shmem_internal_team_t *)team; long *psync = shmem_internal_team_choose_psync(myteam, COLLECT); @@ -451,7 +451,7 @@ shmem_fcollect32(void *target, const void *source, size_t nlong, SHMEM_ERR_CHECK_SYMMETRIC(target, nlong * 4); SHMEM_ERR_CHECK_SYMMETRIC(source, nlong * 4); SHMEM_ERR_CHECK_SYMMETRIC(pSync, sizeof(long) * SHMEM_COLLECT_SYNC_SIZE); - SHMEM_ERR_CHECK_OVERLAP(target, source, nlong * 4, nlong * 4, 1); + SHMEM_ERR_CHECK_OVERLAP(target, source, nlong * 4, nlong * 4, 1, 1); shmem_internal_fcollect(target, source, nlong * 4, PE_start, 1 << logPE_stride, PE_size, pSync); @@ -467,7 +467,7 @@ shmem_fcollect64(void *target, const void *source, size_t nlong, SHMEM_ERR_CHECK_SYMMETRIC(target, nlong * 8); SHMEM_ERR_CHECK_SYMMETRIC(source, nlong * 8); SHMEM_ERR_CHECK_SYMMETRIC(pSync, sizeof(long) * SHMEM_COLLECT_SYNC_SIZE); - SHMEM_ERR_CHECK_OVERLAP(target, source, nlong * 8, nlong * 8, 1); + SHMEM_ERR_CHECK_OVERLAP(target, source, nlong * 8, nlong * 8, 1, 1); shmem_internal_fcollect(target, source, nlong * 8, PE_start, 1 << logPE_stride, PE_size, pSync); @@ -483,7 +483,7 @@ shmem_fcollect64(void *target, const void *source, size_t nlong, SHMEM_ERR_CHECK_SYMMETRIC(dest, nelems * sizeof(TYPE)); \ SHMEM_ERR_CHECK_SYMMETRIC(source, nelems * sizeof(TYPE)); \ SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems * sizeof(TYPE), \ - nelems * sizeof(TYPE), 1); \ + nelems * sizeof(TYPE), 1, 1); \ \ shmem_internal_team_t *myteam = (shmem_internal_team_t *)team; \ long *psync = shmem_internal_team_choose_psync(myteam, \ @@ -505,7 +505,7 @@ shmem_fcollectmem(shmem_team_t team, void *dest, const void *source, SHMEM_ERR_CHECK_TEAM_VALID(team); SHMEM_ERR_CHECK_SYMMETRIC(dest, nelems); SHMEM_ERR_CHECK_SYMMETRIC(source, nelems); - SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems, nelems, 1); + SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems, nelems, 1, 1); shmem_internal_team_t *myteam = (shmem_internal_team_t *)team; long *psync = shmem_internal_team_choose_psync(myteam, COLLECT); @@ -524,7 +524,7 @@ shmem_alltoall32(void *dest, const void *source, size_t nelems, int PE_start, SHMEM_ERR_CHECK_SYMMETRIC(dest, nelems * 4); SHMEM_ERR_CHECK_SYMMETRIC(source, nelems * 4); SHMEM_ERR_CHECK_SYMMETRIC(pSync, sizeof(long) * SHMEM_ALLTOALL_SYNC_SIZE); - SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems * 4, nelems * 4, 1); + SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems * 4, nelems * 4, 1, 1); shmem_internal_alltoall(dest, source, nelems * 4, PE_start, 1 << logPE_stride, PE_size, pSync); @@ -540,7 +540,7 @@ shmem_alltoall64(void *dest, const void *source, size_t nelems, int PE_start, SHMEM_ERR_CHECK_SYMMETRIC(dest, nelems * 8); SHMEM_ERR_CHECK_SYMMETRIC(source, nelems * 8); SHMEM_ERR_CHECK_SYMMETRIC(pSync, sizeof(long) * SHMEM_ALLTOALL_SYNC_SIZE); - SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems * 8, nelems * 8, 1); + SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems * 8, nelems * 8, 1, 1); shmem_internal_alltoall(dest, source, nelems * 8, PE_start, 1 << logPE_stride, PE_size, pSync); @@ -556,7 +556,7 @@ shmem_alltoall64(void *dest, const void *source, size_t nelems, int PE_start, SHMEM_ERR_CHECK_SYMMETRIC(dest, nelems * sizeof(TYPE)); \ SHMEM_ERR_CHECK_SYMMETRIC(source, nelems * sizeof(TYPE)); \ SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems * sizeof(TYPE), \ - nelems * sizeof(TYPE), 1); \ + nelems * sizeof(TYPE), 1, 1); \ \ shmem_internal_team_t *myteam = (shmem_internal_team_t *)team; \ long *psync = shmem_internal_team_choose_psync(myteam, \ @@ -578,7 +578,7 @@ shmem_alltoallmem(shmem_team_t team, void *dest, const void *source, SHMEM_ERR_CHECK_TEAM_VALID(team); SHMEM_ERR_CHECK_SYMMETRIC(dest, nelems); SHMEM_ERR_CHECK_SYMMETRIC(source, nelems); - SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems, nelems, 1); + SHMEM_ERR_CHECK_OVERLAP(dest, source, nelems, nelems, 1, 1); shmem_internal_team_t *myteam = (shmem_internal_team_t *)team; long *psync = shmem_internal_team_choose_psync(myteam, ALLTOALL); diff --git a/src/data_c.c4 b/src/data_c.c4 index 2083fb0b..00471fa8 100644 --- a/src/data_c.c4 +++ b/src/data_c.c4 @@ -287,7 +287,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(target, sizeof(TYPE) * nelems); \ SHMEM_ERR_CHECK_NULL(source, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, source, sizeof(TYPE) * \ - nelems, sizeof(TYPE) * nelems, 0); \ + nelems, sizeof(TYPE) * nelems, 0, \ + (shmem_internal_my_pe == pe)); \ shmem_internal_put_nb(ctx, target, source, \ sizeof(TYPE) * nelems, pe, \ &completion); \ @@ -307,7 +308,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(target, (SIZE) * nelems); \ SHMEM_ERR_CHECK_NULL(source, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, source, (SIZE) * nelems, \ - (SIZE) * nelems, 0); \ + (SIZE) * nelems, 0, \ + (shmem_internal_my_pe == pe)); \ shmem_internal_put_nb(ctx, target, source, (SIZE) * nelems,\ pe, &completion); \ shmem_internal_put_wait(ctx, &completion); \ @@ -324,7 +326,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(target, sizeof(TYPE) * nelems); \ SHMEM_ERR_CHECK_NULL(source, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, source, sizeof(TYPE) * \ - nelems, sizeof(TYPE) * nelems, 0); \ + nelems, sizeof(TYPE) * nelems, 0, \ + (shmem_internal_my_pe == pe)); \ shmem_internal_put_nbi(ctx, target, source, \ sizeof(TYPE)*nelems, \ pe); \ @@ -342,7 +345,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(target, (SIZE) * nelems); \ SHMEM_ERR_CHECK_NULL(source, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, source, (SIZE) * nelems, \ - (SIZE) * nelems, 0); \ + (SIZE) * nelems, 0, \ + (shmem_internal_my_pe == pe)); \ shmem_internal_put_nbi(ctx, target, source, (SIZE)*nelems, \ pe); \ } @@ -359,7 +363,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(source, sizeof(TYPE) * nelems); \ SHMEM_ERR_CHECK_NULL(target, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, source, sizeof(TYPE) * \ - nelems, sizeof(TYPE) * nelems, 0);\ + nelems, sizeof(TYPE) * nelems, 0, \ + (shmem_internal_my_pe == pe)); \ shmem_internal_get(ctx, target, source, \ sizeof(TYPE) * nelems, pe); \ shmem_internal_get_wait(ctx); \ @@ -377,7 +382,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(source, (SIZE) * nelems); \ SHMEM_ERR_CHECK_NULL(target, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, source, (SIZE)*nelems, \ - (SIZE) * nelems, 0); \ + (SIZE) * nelems, 0, \ + (shmem_internal_my_pe == pe)); \ shmem_internal_get(ctx, target, source, (SIZE)*nelems, \ pe); \ shmem_internal_get_wait(ctx); \ @@ -395,7 +401,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(source, sizeof(TYPE) * nelems); \ SHMEM_ERR_CHECK_NULL(target, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, source, sizeof(TYPE) * \ - nelems, sizeof(TYPE) * nelems, 0); \ + nelems, sizeof(TYPE) * nelems, 0, \ + (shmem_internal_my_pe == pe)); \ shmem_internal_get(ctx, target, source, sizeof(TYPE)*nelems, \ pe); \ } @@ -412,7 +419,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(source, (SIZE) * nelems); \ SHMEM_ERR_CHECK_NULL(target, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, source, (SIZE) * nelems, \ - (SIZE) * nelems, 0); \ + (SIZE) * nelems, 0, \ + (shmem_internal_my_pe == pe)); \ shmem_internal_get(ctx, target, source, (SIZE)*nelems, pe);\ } @@ -430,7 +438,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_NULL(source, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ sizeof(TYPE) * ((nelems-1) * tst + 1), \ - sizeof(TYPE) * ((nelems-1) * sst + 1), 0); \ + sizeof(TYPE) * ((nelems-1) * sst + 1), 0, \ + (shmem_internal_my_pe == pe)); \ for ( ; nelems > 0 ; --nelems) { \ shmem_internal_put_scalar(ctx, target, source, \ sizeof(TYPE), pe); \ @@ -455,7 +464,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_NULL(source, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ (SIZE) * ((nelems-1) * tst + 1), \ - (SIZE) * ((nelems-1) * sst + 1), 0); \ + (SIZE) * ((nelems-1) * sst + 1), 0, \ + (shmem_internal_my_pe == pe)); \ for ( ; nelems > 0 ; --nelems) { \ shmem_internal_put_scalar(ctx, target, source, (SIZE), \ pe); \ @@ -480,7 +490,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_NULL(target, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ sizeof(TYPE) * ((nelems-1) * tst + 1), \ - sizeof(TYPE) * ((nelems-1) * sst + 1), 0); \ + sizeof(TYPE) * ((nelems-1) * sst + 1), 0, \ + (shmem_internal_my_pe == pe)); \ for ( ; nelems > 0 ; --nelems) { \ shmem_internal_get(ctx, target, source, sizeof(TYPE), \ pe); \ @@ -506,7 +517,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_NULL(target, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, source, \ (SIZE) * ((nelems-1) * tst + 1), \ - (SIZE) * ((nelems-1) * sst + 1), 0); \ + (SIZE) * ((nelems-1) * sst + 1), 0, \ + (shmem_internal_my_pe == pe)); \ for ( ; nelems > 0 ; --nelems) { \ shmem_internal_get(ctx, target, source, (SIZE), pe);\ target = (uint8_t*)target + tst*(SIZE); \ @@ -528,7 +540,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(target, sizeof(TYPE) * nelems); \ SHMEM_ERR_CHECK_NULL(source, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, sig_addr, sizeof(TYPE) * nelems, \ - sizeof(uint64_t), 0); \ + sizeof(uint64_t), 0, \ + (shmem_internal_my_pe == pe)); \ SHMEM_ERR_CHECK_SIG_OP(sig_op); \ shmem_internal_put_nb(ctx, target, source, \ sizeof(TYPE) * nelems, pe, \ @@ -559,7 +572,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(target, (SIZE) * nelems); \ SHMEM_ERR_CHECK_NULL(source, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, sig_addr, (SIZE) * nelems, \ - sizeof(uint64_t), 0); \ + sizeof(uint64_t), 0, \ + (shmem_internal_my_pe == pe)); \ SHMEM_ERR_CHECK_SIG_OP(sig_op); \ shmem_internal_put_nb(ctx, target, source, (SIZE) * nelems, \ pe, &completion); \ @@ -587,7 +601,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(target, sizeof(TYPE) * nelems); \ SHMEM_ERR_CHECK_NULL(source, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, sig_addr, sizeof(TYPE) * nelems, \ - sizeof(uint64_t), 0); \ + sizeof(uint64_t), 0, \ + (shmem_internal_my_pe == pe)); \ SHMEM_ERR_CHECK_SIG_OP(sig_op); \ shmem_internal_put_signal_nbi(ctx, target, source, \ sizeof(TYPE) * nelems, sig_addr, \ @@ -607,7 +622,8 @@ SHMEM_PROF_DEF_CTX_PUT_N_SIGNAL_NBI(`mem') SHMEM_ERR_CHECK_SYMMETRIC(target, (SIZE) * nelems); \ SHMEM_ERR_CHECK_NULL(source, nelems); \ SHMEM_ERR_CHECK_OVERLAP(target, sig_addr, (SIZE) * nelems, \ - sizeof(uint64_t), 0); \ + sizeof(uint64_t), 0, \ + (shmem_internal_my_pe == pe)); \ SHMEM_ERR_CHECK_SIG_OP(sig_op); \ shmem_internal_put_signal_nbi(ctx, target, source, (SIZE) * nelems, \ sig_addr, signal, sig_op, pe); \ diff --git a/src/shmem_internal.h b/src/shmem_internal.h index 8db75be7..b469f5d6 100644 --- a/src/shmem_internal.h +++ b/src/shmem_internal.h @@ -316,21 +316,23 @@ extern hwloc_topology_t shmem_internal_topology; * size1 == size2 and complete_overlap_allowed is 1 * For the pt2pt sync routines (ivars/status/indices arguments): * size1 != size2 and complete_overlap_allowed is 0 */ -#define SHMEM_ERR_CHECK_OVERLAP(ptr1, ptr2, size1, size2, complete_overlap_allowed) \ - do { \ - const void *p1 = (void*)(ptr1); \ - const void *p2 = (void*)(ptr2); \ - const void *ptr_low = p1 > p2 ? p2 : p1; \ - const void *ptr_high = p1 > p2 ? p1 : p2; \ - const size_t sz_low = p1 > p2 ? size2 : size1; \ - const void *ptr_extent = (void *)((char *)ptr_low + sz_low); \ - if (complete_overlap_allowed && p1 == p2) { \ - break; /* Skip this check when buffer is allowed to completely overlap */ \ - } \ - if (ptr_extent > ptr_high) { \ - RAISE_ERROR_MSG("Argument \"%s\" [%p..%p) overlaps argument (%p)\n", #ptr1, \ - ptr_low, ptr_extent, ptr_high); \ - } \ +#define SHMEM_ERR_CHECK_OVERLAP(ptr1, ptr2, size1, size2, complete_overlap_allowed, precheck) \ + do { \ + if (precheck) { \ + const void *p1 = (void*)(ptr1); \ + const void *p2 = (void*)(ptr2); \ + const void *ptr_low = p1 > p2 ? p2 : p1; \ + const void *ptr_high = p1 > p2 ? p1 : p2; \ + const size_t sz_low = p1 > p2 ? size2 : size1; \ + const void *ptr_extent = (void *)((char *)ptr_low + sz_low); \ + if (complete_overlap_allowed && p1 == p2) { \ + break; /* Skip this check when buffer is allowed to completely overlap */ \ + } \ + if (ptr_extent > ptr_high) { \ + RAISE_ERROR_MSG("Argument \"%s\" [%p..%p) overlaps argument (%p)\n", #ptr1, \ + ptr_low, ptr_extent, ptr_high); \ + } \ + } } while (0) #define SHMEM_ERR_CHECK_NULL(ptr, nelems) \ @@ -378,7 +380,7 @@ extern hwloc_topology_t shmem_internal_topology; #define SHMEM_ERR_CHECK_CTX(ctx) #define SHMEM_ERR_CHECK_SYMMETRIC(ptr, len) #define SHMEM_ERR_CHECK_SYMMETRIC_HEAP(ptr) -#define SHMEM_ERR_CHECK_OVERLAP(ptr1, ptr2, size1, size2, complete_overlap_allowed) +#define SHMEM_ERR_CHECK_OVERLAP(ptr1, ptr2, size1, size2, complete_overlap_allowed, precheck) #define SHMEM_ERR_CHECK_NULL(ptr, nelems) #define SHMEM_ERR_CHECK_CMP_OP(op) #define SHMEM_ERR_CHECK_SIG_OP(op) \ diff --git a/src/synchronization_c.c4 b/src/synchronization_c.c4 index 966c6c85..6e3eef0f 100644 --- a/src/synchronization_c.c4 +++ b/src/synchronization_c.c4 @@ -214,172 +214,172 @@ SHMEM_BIND_C_WAIT(`SHMEM_DEF_WAIT') SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL') -#define SHMEM_DEF_WAIT_UNTIL_ALL(STYPE,TYPE) \ - void SHMEM_FUNCTION_ATTRIBUTES \ - shmem_##STYPE##_wait_until_all(TYPE *vars, size_t nelems, \ - const int *status, int cond, TYPE value) \ - { \ - SHMEM_ERR_CHECK_INITIALIZED(); \ - SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ - SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0); \ - SHMEM_ERR_CHECK_CMP_OP(cond); \ - \ - size_t i = 0, num_ignored = 0; \ - \ - if (status) { \ - for (i = 0; i < nelems; i++) { \ - if (status[i]) num_ignored++; \ - } \ - } \ - if (nelems == 0 || num_ignored == nelems) { \ - shmem_transport_probe(); \ - return; \ - } \ - \ - for (i = 0; i < nelems; i++) { \ - if (status == NULL || !status[i]) { \ - SHMEM_INTERNAL_WAIT_UNTIL(&vars[i], cond, value); \ - } \ - } \ - \ - shmem_internal_membar_acq_rel(); \ - shmem_transport_syncmem(); \ +#define SHMEM_DEF_WAIT_UNTIL_ALL(STYPE,TYPE) \ + void SHMEM_FUNCTION_ATTRIBUTES \ + shmem_##STYPE##_wait_until_all(TYPE *vars, size_t nelems, \ + const int *status, int cond, TYPE value) \ + { \ + SHMEM_ERR_CHECK_INITIALIZED(); \ + SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ + SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0, 1); \ + SHMEM_ERR_CHECK_CMP_OP(cond); \ + \ + size_t i = 0, num_ignored = 0; \ + \ + if (status) { \ + for (i = 0; i < nelems; i++) { \ + if (status[i]) num_ignored++; \ + } \ + } \ + if (nelems == 0 || num_ignored == nelems) { \ + shmem_transport_probe(); \ + return; \ + } \ + \ + for (i = 0; i < nelems; i++) { \ + if (status == NULL || !status[i]) { \ + SHMEM_INTERNAL_WAIT_UNTIL(&vars[i], cond, value); \ + } \ + } \ + \ + shmem_internal_membar_acq_rel(); \ + shmem_transport_syncmem(); \ } SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_ALL') -#define SHMEM_DEF_WAIT_UNTIL_ALL_VECTOR(STYPE,TYPE) \ - void SHMEM_FUNCTION_ATTRIBUTES \ - shmem_##STYPE##_wait_until_all_vector(TYPE *vars, size_t nelems, \ - const int *status, int cond, TYPE *values) \ - { \ - SHMEM_ERR_CHECK_INITIALIZED(); \ - SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ - SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0); \ - SHMEM_ERR_CHECK_CMP_OP(cond); \ - \ - size_t i = 0, num_ignored = 0; \ - \ - if (status) { \ - for (i = 0; i < nelems; i++) { \ - if(status[i]) num_ignored++; \ - } \ - } \ - \ - if (nelems == 0 || num_ignored == nelems) { \ - shmem_transport_probe(); \ - return; \ - } \ - \ - for (i = 0; i < nelems; i++) { \ - if (status == NULL || !status[i]) { \ - SHMEM_INTERNAL_WAIT_UNTIL(&vars[i], cond, values[i]); \ - } \ - } \ - \ - shmem_internal_membar_acq_rel(); \ - shmem_transport_syncmem(); \ +#define SHMEM_DEF_WAIT_UNTIL_ALL_VECTOR(STYPE,TYPE) \ + void SHMEM_FUNCTION_ATTRIBUTES \ + shmem_##STYPE##_wait_until_all_vector(TYPE *vars, size_t nelems, \ + const int *status, int cond, TYPE *values) \ + { \ + SHMEM_ERR_CHECK_INITIALIZED(); \ + SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ + SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0, 1); \ + SHMEM_ERR_CHECK_CMP_OP(cond); \ + \ + size_t i = 0, num_ignored = 0; \ + \ + if (status) { \ + for (i = 0; i < nelems; i++) { \ + if(status[i]) num_ignored++; \ + } \ + } \ + \ + if (nelems == 0 || num_ignored == nelems) { \ + shmem_transport_probe(); \ + return; \ + } \ + \ + for (i = 0; i < nelems; i++) { \ + if (status == NULL || !status[i]) { \ + SHMEM_INTERNAL_WAIT_UNTIL(&vars[i], cond, values[i]); \ + } \ + } \ + \ + shmem_internal_membar_acq_rel(); \ + shmem_transport_syncmem(); \ } SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_ALL_VECTOR') -#define SHMEM_DEF_WAIT_UNTIL_ANY(STYPE,TYPE) \ - size_t SHMEM_FUNCTION_ATTRIBUTES \ - shmem_##STYPE##_wait_until_any(TYPE *vars, size_t nelems, \ - const int *status, int cond, TYPE value) \ - { \ - SHMEM_ERR_CHECK_INITIALIZED(); \ - SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ - SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0); \ - SHMEM_ERR_CHECK_CMP_OP(cond); \ - \ - size_t i = 0, found_idx = SIZE_MAX, num_ignored = 0; \ - int cmpret = 0; \ - \ - if (status) { \ - for (i = 0; i < nelems; i++) { \ - if (status[i]) num_ignored++; \ - } \ - } \ - if (nelems == 0 || num_ignored == nelems) { \ - shmem_transport_probe(); \ - return SIZE_MAX; \ - } \ - \ - SHMEM_MUTEX_LOCK(shmem_internal_mutex_rand_r); \ - size_t start_idx = (size_t) (rand_r(&shmem_internal_rand_seed) / \ - (RAND_MAX + 1.0) * (double) nelems); \ - SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_rand_r); \ - \ - while (!cmpret) { \ - for (i = 0; i < nelems; i++) { \ - size_t idx = (i + start_idx) % nelems; \ - if (status == NULL || !status[idx]) { \ - SHMEM_TEST(cond, &vars[idx], value, cmpret); \ - if (cmpret) { \ - found_idx = idx; \ - break; \ - } \ - } \ - } \ - if (!cmpret) shmem_transport_probe(); \ - } \ - \ - shmem_internal_membar_acq_rel(); \ - shmem_transport_syncmem(); \ - return found_idx; \ +#define SHMEM_DEF_WAIT_UNTIL_ANY(STYPE,TYPE) \ + size_t SHMEM_FUNCTION_ATTRIBUTES \ + shmem_##STYPE##_wait_until_any(TYPE *vars, size_t nelems, \ + const int *status, int cond, TYPE value) \ + { \ + SHMEM_ERR_CHECK_INITIALIZED(); \ + SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ + SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0, 1); \ + SHMEM_ERR_CHECK_CMP_OP(cond); \ + \ + size_t i = 0, found_idx = SIZE_MAX, num_ignored = 0; \ + int cmpret = 0; \ + \ + if (status) { \ + for (i = 0; i < nelems; i++) { \ + if (status[i]) num_ignored++; \ + } \ + } \ + if (nelems == 0 || num_ignored == nelems) { \ + shmem_transport_probe(); \ + return SIZE_MAX; \ + } \ + \ + SHMEM_MUTEX_LOCK(shmem_internal_mutex_rand_r); \ + size_t start_idx = (size_t) (rand_r(&shmem_internal_rand_seed) / \ + (RAND_MAX + 1.0) * (double) nelems); \ + SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_rand_r); \ + \ + while (!cmpret) { \ + for (i = 0; i < nelems; i++) { \ + size_t idx = (i + start_idx) % nelems; \ + if (status == NULL || !status[idx]) { \ + SHMEM_TEST(cond, &vars[idx], value, cmpret); \ + if (cmpret) { \ + found_idx = idx; \ + break; \ + } \ + } \ + } \ + if (!cmpret) shmem_transport_probe(); \ + } \ + \ + shmem_internal_membar_acq_rel(); \ + shmem_transport_syncmem(); \ + return found_idx; \ } SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_ANY') -#define SHMEM_DEF_WAIT_UNTIL_ANY_VECTOR(STYPE,TYPE) \ - size_t SHMEM_FUNCTION_ATTRIBUTES \ - shmem_##STYPE##_wait_until_any_vector(TYPE *vars, size_t nelems, \ - const int *status, int cond, TYPE *values) \ - { \ - SHMEM_ERR_CHECK_INITIALIZED(); \ - SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ - SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0); \ - SHMEM_ERR_CHECK_CMP_OP(cond); \ - \ - size_t i = 0, found_idx = SIZE_MAX, num_ignored = 0; \ - int cmpret = 0; \ - \ - if (status) { \ - for (i = 0; i < nelems; i++) { \ - if (status[i]) num_ignored++; \ - } \ - } \ - if (nelems == 0 || num_ignored == nelems) { \ - shmem_transport_probe(); \ - return SIZE_MAX; \ - } \ - \ - SHMEM_MUTEX_LOCK(shmem_internal_mutex_rand_r); \ - size_t start_idx = (size_t) (rand_r(&shmem_internal_rand_seed) / \ - (RAND_MAX + 1.0) * (double) nelems); \ - SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_rand_r); \ - \ - while (!cmpret) { \ - for (i = 0; i < nelems; i++) { \ - size_t idx = (i + start_idx) % nelems; \ - if (status == NULL || !status[idx]) { \ - SHMEM_TEST(cond, &vars[idx], values[idx], cmpret); \ - if (cmpret) { \ - found_idx = idx; \ - break; \ - } \ - } \ - } \ - if (!cmpret) shmem_transport_probe(); \ - } \ - \ - shmem_internal_membar_acq_rel(); \ - shmem_transport_syncmem(); \ - return found_idx; \ +#define SHMEM_DEF_WAIT_UNTIL_ANY_VECTOR(STYPE,TYPE) \ + size_t SHMEM_FUNCTION_ATTRIBUTES \ + shmem_##STYPE##_wait_until_any_vector(TYPE *vars, size_t nelems, \ + const int *status, int cond, TYPE *values) \ + { \ + SHMEM_ERR_CHECK_INITIALIZED(); \ + SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ + SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0, 1); \ + SHMEM_ERR_CHECK_CMP_OP(cond); \ + \ + size_t i = 0, found_idx = SIZE_MAX, num_ignored = 0; \ + int cmpret = 0; \ + \ + if (status) { \ + for (i = 0; i < nelems; i++) { \ + if (status[i]) num_ignored++; \ + } \ + } \ + if (nelems == 0 || num_ignored == nelems) { \ + shmem_transport_probe(); \ + return SIZE_MAX; \ + } \ + \ + SHMEM_MUTEX_LOCK(shmem_internal_mutex_rand_r); \ + size_t start_idx = (size_t) (rand_r(&shmem_internal_rand_seed) / \ + (RAND_MAX + 1.0) * (double) nelems); \ + SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_rand_r); \ + \ + while (!cmpret) { \ + for (i = 0; i < nelems; i++) { \ + size_t idx = (i + start_idx) % nelems; \ + if (status == NULL || !status[idx]) { \ + SHMEM_TEST(cond, &vars[idx], values[idx], cmpret); \ + if (cmpret) { \ + found_idx = idx; \ + break; \ + } \ + } \ + } \ + if (!cmpret) shmem_transport_probe(); \ + } \ + \ + shmem_internal_membar_acq_rel(); \ + shmem_transport_syncmem(); \ + return found_idx; \ } SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_ANY_VECTOR') @@ -393,11 +393,11 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_ANY_VECTOR') SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ SHMEM_ERR_CHECK_OVERLAP(indices, status, sizeof(size_t) * nelems, \ - sizeof(int) * nelems, 0); \ + sizeof(int) * nelems, 0, 1); \ SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, \ - sizeof(int) * nelems, 0); \ + sizeof(int) * nelems, 0, 1); \ SHMEM_ERR_CHECK_OVERLAP(vars, indices, sizeof(TYPE) * nelems, \ - sizeof(size_t) * nelems, 0); \ + sizeof(size_t) * nelems, 0, 1); \ SHMEM_ERR_CHECK_CMP_OP(cond); \ \ size_t i = 0, ncompleted = 0, num_ignored = 0; \ @@ -441,11 +441,11 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_SOME') SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ SHMEM_ERR_CHECK_OVERLAP(indices, status, sizeof(size_t) * nelems, \ - sizeof(int) * nelems, 0); \ + sizeof(int) * nelems, 0, 1); \ SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, \ - sizeof(int) * nelems, 0); \ + sizeof(int) * nelems, 0, 1); \ SHMEM_ERR_CHECK_OVERLAP(vars, indices, sizeof(TYPE) * nelems, \ - sizeof(size_t) * nelems, 0); \ + sizeof(size_t) * nelems, 0, 1); \ SHMEM_ERR_CHECK_CMP_OP(cond); \ \ size_t i = 0, ncompleted = 0, num_ignored = 0; \ @@ -503,141 +503,141 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_WAIT_UNTIL_SOME_VECTOR') SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST') -#define SHMEM_DEF_TEST_ALL(STYPE,TYPE) \ - int SHMEM_FUNCTION_ATTRIBUTES \ - shmem_##STYPE##_test_all(TYPE *vars, size_t nelems, const int *status, int cond, \ - TYPE value) \ - { \ - SHMEM_ERR_CHECK_INITIALIZED(); \ - SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ - SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0); \ - SHMEM_ERR_CHECK_CMP_OP(cond); \ - \ - size_t i = 0; \ - \ - for (i = 0; i < nelems; i++) { \ - if (status == NULL || !status[i]) { \ - int cmpret; \ - SHMEM_TEST(cond, &vars[i], value, cmpret); \ - if (!cmpret) { \ - shmem_transport_probe(); \ - return 0; \ - } \ - } \ - } \ - \ - shmem_internal_membar_acq_rel(); \ - shmem_transport_syncmem(); \ - return 1; \ +#define SHMEM_DEF_TEST_ALL(STYPE,TYPE) \ + int SHMEM_FUNCTION_ATTRIBUTES \ + shmem_##STYPE##_test_all(TYPE *vars, size_t nelems, const int *status, int cond, \ + TYPE value) \ + { \ + SHMEM_ERR_CHECK_INITIALIZED(); \ + SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ + SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0, 1); \ + SHMEM_ERR_CHECK_CMP_OP(cond); \ + \ + size_t i = 0; \ + \ + for (i = 0; i < nelems; i++) { \ + if (status == NULL || !status[i]) { \ + int cmpret; \ + SHMEM_TEST(cond, &vars[i], value, cmpret); \ + if (!cmpret) { \ + shmem_transport_probe(); \ + return 0; \ + } \ + } \ + } \ + \ + shmem_internal_membar_acq_rel(); \ + shmem_transport_syncmem(); \ + return 1; \ } SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_ALL') -#define SHMEM_DEF_TEST_ALL_VECTOR(STYPE,TYPE) \ - int SHMEM_FUNCTION_ATTRIBUTES \ - shmem_##STYPE##_test_all_vector(TYPE *vars, size_t nelems, const int *status, int cond, \ - TYPE *values) \ - { \ - SHMEM_ERR_CHECK_INITIALIZED(); \ - SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ - SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0); \ - SHMEM_ERR_CHECK_CMP_OP(cond); \ - \ - size_t i = 0; \ - \ - for (i = 0; i < nelems; i++) { \ - if (status == NULL || !status[i]) { \ - int cmpret; \ - SHMEM_TEST(cond, &vars[i], values[i], cmpret); \ - if (!cmpret) { \ - shmem_transport_probe(); \ - return 0; \ - } \ - } \ - } \ - \ - shmem_internal_membar_acq_rel(); \ - shmem_transport_syncmem(); \ - return 1; \ +#define SHMEM_DEF_TEST_ALL_VECTOR(STYPE,TYPE) \ + int SHMEM_FUNCTION_ATTRIBUTES \ + shmem_##STYPE##_test_all_vector(TYPE *vars, size_t nelems, const int *status, int cond, \ + TYPE *values) \ + { \ + SHMEM_ERR_CHECK_INITIALIZED(); \ + SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ + SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0, 1); \ + SHMEM_ERR_CHECK_CMP_OP(cond); \ + \ + size_t i = 0; \ + \ + for (i = 0; i < nelems; i++) { \ + if (status == NULL || !status[i]) { \ + int cmpret; \ + SHMEM_TEST(cond, &vars[i], values[i], cmpret); \ + if (!cmpret) { \ + shmem_transport_probe(); \ + return 0; \ + } \ + } \ + } \ + \ + shmem_internal_membar_acq_rel(); \ + shmem_transport_syncmem(); \ + return 1; \ } SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_ALL_VECTOR') -#define SHMEM_DEF_TEST_ANY(STYPE,TYPE) \ - size_t SHMEM_FUNCTION_ATTRIBUTES \ - shmem_##STYPE##_test_any(TYPE *vars, size_t nelems, const int *status, \ - int cond, TYPE value) \ - { \ - SHMEM_ERR_CHECK_INITIALIZED(); \ - SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ - SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0); \ - SHMEM_ERR_CHECK_CMP_OP(cond); \ - \ - size_t found_idx = SIZE_MAX, i = 0; \ - SHMEM_MUTEX_LOCK(shmem_internal_mutex_rand_r); \ - size_t start_idx = (size_t) (rand_r(&shmem_internal_rand_seed) / \ - (RAND_MAX + 1.0) * (double) nelems); \ - SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_rand_r); \ - \ - for (i = 0; i < nelems; i++) { \ - int cmpret = 0; \ - size_t idx = (i + start_idx) % nelems; \ - if (status == NULL || !status[idx]) { \ - SHMEM_TEST(cond, &vars[idx], value, cmpret); \ - if (cmpret) { \ - found_idx = idx; \ - break; \ - } \ - } \ - } \ - if (found_idx != SIZE_MAX) { \ - shmem_internal_membar_acq_rel(); \ - shmem_transport_syncmem(); \ - } else \ - shmem_transport_probe(); \ - \ - return found_idx; \ +#define SHMEM_DEF_TEST_ANY(STYPE,TYPE) \ + size_t SHMEM_FUNCTION_ATTRIBUTES \ + shmem_##STYPE##_test_any(TYPE *vars, size_t nelems, const int *status, \ + int cond, TYPE value) \ + { \ + SHMEM_ERR_CHECK_INITIALIZED(); \ + SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ + SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0, 1); \ + SHMEM_ERR_CHECK_CMP_OP(cond); \ + \ + size_t found_idx = SIZE_MAX, i = 0; \ + SHMEM_MUTEX_LOCK(shmem_internal_mutex_rand_r); \ + size_t start_idx = (size_t) (rand_r(&shmem_internal_rand_seed) / \ + (RAND_MAX + 1.0) * (double) nelems); \ + SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_rand_r); \ + \ + for (i = 0; i < nelems; i++) { \ + int cmpret = 0; \ + size_t idx = (i + start_idx) % nelems; \ + if (status == NULL || !status[idx]) { \ + SHMEM_TEST(cond, &vars[idx], value, cmpret); \ + if (cmpret) { \ + found_idx = idx; \ + break; \ + } \ + } \ + } \ + if (found_idx != SIZE_MAX) { \ + shmem_internal_membar_acq_rel(); \ + shmem_transport_syncmem(); \ + } else \ + shmem_transport_probe(); \ + \ + return found_idx; \ } SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_ANY') -#define SHMEM_DEF_TEST_ANY_VECTOR(STYPE,TYPE) \ - size_t SHMEM_FUNCTION_ATTRIBUTES \ - shmem_##STYPE##_test_any_vector(TYPE *vars, size_t nelems, const int *status, \ - int cond, TYPE *values) \ - { \ - SHMEM_ERR_CHECK_INITIALIZED(); \ - SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ - SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0); \ - SHMEM_ERR_CHECK_CMP_OP(cond); \ - \ - size_t found_idx = SIZE_MAX, i = 0; \ - SHMEM_MUTEX_LOCK(shmem_internal_mutex_rand_r); \ - size_t start_idx = (size_t) (rand_r(&shmem_internal_rand_seed) / \ - (RAND_MAX + 1.0) * (double) nelems); \ - SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_rand_r); \ - \ - for (i = 0; i < nelems; i++) { \ - int cmpret = 0; \ - size_t idx = (i + start_idx) % nelems; \ - if (status == NULL || !status[idx]) { \ - SHMEM_TEST(cond, &vars[idx], values[idx], cmpret); \ - if (cmpret) { \ - found_idx = idx; \ - break; \ - } \ - } \ - } \ - if (found_idx != SIZE_MAX) { \ - shmem_internal_membar_acq_rel(); \ - shmem_transport_syncmem(); \ - } else \ - shmem_transport_probe(); \ - \ - return found_idx; \ +#define SHMEM_DEF_TEST_ANY_VECTOR(STYPE,TYPE) \ + size_t SHMEM_FUNCTION_ATTRIBUTES \ + shmem_##STYPE##_test_any_vector(TYPE *vars, size_t nelems, const int *status, \ + int cond, TYPE *values) \ + { \ + SHMEM_ERR_CHECK_INITIALIZED(); \ + SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ + SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, sizeof(int) * nelems, 0, 1); \ + SHMEM_ERR_CHECK_CMP_OP(cond); \ + \ + size_t found_idx = SIZE_MAX, i = 0; \ + SHMEM_MUTEX_LOCK(shmem_internal_mutex_rand_r); \ + size_t start_idx = (size_t) (rand_r(&shmem_internal_rand_seed) / \ + (RAND_MAX + 1.0) * (double) nelems); \ + SHMEM_MUTEX_UNLOCK(shmem_internal_mutex_rand_r); \ + \ + for (i = 0; i < nelems; i++) { \ + int cmpret = 0; \ + size_t idx = (i + start_idx) % nelems; \ + if (status == NULL || !status[idx]) { \ + SHMEM_TEST(cond, &vars[idx], values[idx], cmpret); \ + if (cmpret) { \ + found_idx = idx; \ + break; \ + } \ + } \ + } \ + if (found_idx != SIZE_MAX) { \ + shmem_internal_membar_acq_rel(); \ + shmem_transport_syncmem(); \ + } else \ + shmem_transport_probe(); \ + \ + return found_idx; \ } SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_ANY_VECTOR') @@ -651,11 +651,11 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_ANY_VECTOR') SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ SHMEM_ERR_CHECK_OVERLAP(indices, status, sizeof(size_t) * nelems, \ - sizeof(int) * nelems, 0); \ + sizeof(int) * nelems, 0, 1); \ SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, \ - sizeof(int) * nelems, 0); \ + sizeof(int) * nelems, 0, 1); \ SHMEM_ERR_CHECK_OVERLAP(vars, indices, sizeof(TYPE) * nelems, \ - sizeof(size_t) * nelems, 0); \ + sizeof(size_t) * nelems, 0, 1); \ SHMEM_ERR_CHECK_CMP_OP(cond); \ \ size_t i = 0, ncompleted = 0, num_ignored = 0; \ @@ -697,11 +697,11 @@ SHMEM_BIND_C_SYNC(`SHMEM_DEF_TEST_SOME') SHMEM_ERR_CHECK_INITIALIZED(); \ SHMEM_ERR_CHECK_SYMMETRIC(vars, sizeof(TYPE)); \ SHMEM_ERR_CHECK_OVERLAP(indices, status, sizeof(size_t) * nelems, \ - sizeof(int) * nelems, 0); \ + sizeof(int) * nelems, 0, 1); \ SHMEM_ERR_CHECK_OVERLAP(vars, status, sizeof(TYPE) * nelems, \ - sizeof(int) * nelems, 0); \ + sizeof(int) * nelems, 0, 1); \ SHMEM_ERR_CHECK_OVERLAP(vars, indices, sizeof(TYPE) * nelems, \ - sizeof(size_t) * nelems, 0); \ + sizeof(size_t) * nelems, 0, 1); \ SHMEM_ERR_CHECK_CMP_OP(cond); \ \ size_t i = 0, ncompleted = 0, num_ignored = 0; \