From 122b167124e910aa897c544d7947c1c8613b6f8e Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Wed, 13 Sep 2023 14:41:00 -0700 Subject: [PATCH 01/11] Update kp_sampler_skip.cpp Putting in tool_invoked_fence code. --- common/kokkos-sampler/kp_sampler_skip.cpp | 77 ++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 9c53bcc5c..1590ca176 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -40,6 +40,51 @@ void kokkosp_request_tool_settings(const uint32_t, } } +void get_global_fence_choice() { + // re-read environment variable to get most accurate value + const char* tool_globFence_str = getenv("KOKKOS_TOOLS_GLOBALFENCES"); + if (NULL != tool_globFence_str) { + tool_globFence = atoi(tool_globFence_str); + } +} + +// set of functions from Kokkos ToolProgrammingInterface (includes fence) +Kokkos::Tools::Experimental::ToolProgrammingInterface tpi_funcs; + +uint32_t getDeviceID(uint32_t devid_in) { + int num_device_bits = 7; + int num_instance_bits = 17; + return (~((uint32_t(-1)) << num_device_bits)) & + (devid_in >> num_instance_bits); +} + +void invoke_ktools_fence(uint32_t devID) { + if (tpi_funcs.fence != nullptr) { + tpi_funcs.fence(devID); + if (tool_verbosity > 1) { + printf("KokkosP: Sampler utility sucessfully invoked " + " tool-induced fence on device %d\n", getDeviceID(devID)); + } + } else { + printf( + "KokkosP: FATAL: Kokkos Tools Programming Interface's tool-invoked " + "Fence is NULL!\n"); + exit(-1); + } +} + +void kokkosp_provide_tool_programming_interface( + uint32_t num_funcs, Kokkos_Tools_ToolProgrammingInterface* funcsFromTPI) { + if (!num_funcs) { + if (tool_verbosity > 0) + printf( + "KokkosP: Note: Number of functions in Tools Programming Interface " + "is 0!\n"); + } + tpi_funcs = *funcsFromTPI; +} + + void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE"); @@ -164,6 +209,11 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, printf("KokkosP: sample %llu calling child-begin function...\n", (unsigned long long)(*kID)); } + get_global_fence_choice(); // re-read environment variable to get most + // accurate + if (tool_globFence) { + invoke_ktools_fence(devID); + } if (NULL != beginForCallee) { uint64_t nestedkID = 0; (*beginForCallee)(name, devID, &nestedkID); @@ -180,6 +230,11 @@ void kokkosp_end_parallel_for(const uint64_t kID) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); } + get_global_fence_choice(); // re-read environment variable to get most + // accurate + if (tool_globFence) { + invoke_ktools_fence(devID); + } (*endForCallee)(retrievedNestedkID); } } @@ -197,6 +252,11 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, } if (NULL != beginScanCallee) { uint64_t nestedkID = 0; + get_global_fence_choice(); // re-read environment variable to get most + // accurate + if (tool_globFence) { + invoke_ktools_fence(devID); + } (*beginScanCallee)(name, devID, &nestedkID); infokIDSample.insert({*kID, nestedkID}); } @@ -211,6 +271,11 @@ void kokkosp_end_parallel_scan(const uint64_t kID) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); } + get_global_fence_choice(); // re-read environment variable to get most + // accurate + if (tool_globFence) { + invoke_ktools_fence(devID); + } (*endScanCallee)(retrievedNestedkID); } } @@ -226,9 +291,14 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, printf("KokkosP: sample %llu calling child-begin function...\n", (unsigned long long)(*kID)); } - + if (NULL != beginReduceCallee) { uint64_t nestedkID = 0; + get_global_fence_choice(); // re-read environment variable to get most + // accurate + if (tool_globFence) { + invoke_ktools_fence(devID); + } (*beginReduceCallee)(name, devID, &nestedkID); infokIDSample.insert({*kID, nestedkID}); } @@ -243,6 +313,11 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); } + get_global_fence_choice(); // re-read environment variable to get most + // accurate + if (tool_globFence) { + invoke_ktools_fence(devID); + } (*endScanCallee)(retrievedNestedkID); } } From a2e73a0058fe8c22377ca5b2eaca875f3a837ea9 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Wed, 13 Sep 2023 14:48:09 -0700 Subject: [PATCH 02/11] Update kp_sampler_skip.cpp Fixing tool induced fences to always fence on device with DevID 0. Fencing with DevID will be a done in subsequent patch (where Pair object will be used in the hash table to capture the begin sample's information. Note that the pair/tuple object can capture other state information to store between the beginning of sampling event and ending of it. --- common/kokkos-sampler/kp_sampler_skip.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 1590ca176..accca66db 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -212,7 +212,7 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, get_global_fence_choice(); // re-read environment variable to get most // accurate if (tool_globFence) { - invoke_ktools_fence(devID); + invoke_ktools_fence(0); } if (NULL != beginForCallee) { uint64_t nestedkID = 0; @@ -233,7 +233,7 @@ void kokkosp_end_parallel_for(const uint64_t kID) { get_global_fence_choice(); // re-read environment variable to get most // accurate if (tool_globFence) { - invoke_ktools_fence(devID); + invoke_ktools_fence(0); } (*endForCallee)(retrievedNestedkID); } @@ -255,7 +255,7 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, get_global_fence_choice(); // re-read environment variable to get most // accurate if (tool_globFence) { - invoke_ktools_fence(devID); + invoke_ktools_fence(0); } (*beginScanCallee)(name, devID, &nestedkID); infokIDSample.insert({*kID, nestedkID}); @@ -274,7 +274,7 @@ void kokkosp_end_parallel_scan(const uint64_t kID) { get_global_fence_choice(); // re-read environment variable to get most // accurate if (tool_globFence) { - invoke_ktools_fence(devID); + invoke_ktools_fence(0); } (*endScanCallee)(retrievedNestedkID); } @@ -297,7 +297,7 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, get_global_fence_choice(); // re-read environment variable to get most // accurate if (tool_globFence) { - invoke_ktools_fence(devID); + invoke_ktools_fence(0); } (*beginReduceCallee)(name, devID, &nestedkID); infokIDSample.insert({*kID, nestedkID}); @@ -316,7 +316,7 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) { get_global_fence_choice(); // re-read environment variable to get most // accurate if (tool_globFence) { - invoke_ktools_fence(devID); + invoke_ktools_fence(0); } (*endScanCallee)(retrievedNestedkID); } From ac8d239e3eeee043d56dcba1087b9a92a96e2529 Mon Sep 17 00:00:00 2001 From: Vivek Kale Date: Thu, 14 Sep 2023 08:57:25 -0700 Subject: [PATCH 03/11] update for prints of sampler --- common/kokkos-sampler/kp_sampler_skip.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index accca66db..3332aab54 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -212,7 +212,7 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, get_global_fence_choice(); // re-read environment variable to get most // accurate if (tool_globFence) { - invoke_ktools_fence(0); + invoke_ktools_fence(devID); } if (NULL != beginForCallee) { uint64_t nestedkID = 0; @@ -255,7 +255,7 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, get_global_fence_choice(); // re-read environment variable to get most // accurate if (tool_globFence) { - invoke_ktools_fence(0); + invoke_ktools_fence(devID); } (*beginScanCallee)(name, devID, &nestedkID); infokIDSample.insert({*kID, nestedkID}); @@ -297,7 +297,7 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, get_global_fence_choice(); // re-read environment variable to get most // accurate if (tool_globFence) { - invoke_ktools_fence(0); + invoke_ktools_fence(devID); } (*beginReduceCallee)(name, devID, &nestedkID); infokIDSample.insert({*kID, nestedkID}); From 8dd9724e7064365d770ce03ad7b8b40a9f503531 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 14 Sep 2023 09:57:14 -0700 Subject: [PATCH 04/11] sample skip: applied clang format --- common/kokkos-sampler/kp_sampler_skip.cpp | 45 ++++++++++++----------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index accca66db..8a2950275 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -62,15 +62,17 @@ void invoke_ktools_fence(uint32_t devID) { if (tpi_funcs.fence != nullptr) { tpi_funcs.fence(devID); if (tool_verbosity > 1) { - printf("KokkosP: Sampler utility sucessfully invoked " - " tool-induced fence on device %d\n", getDeviceID(devID)); + printf( + "KokkosP: Sampler utility sucessfully invoked " + " tool-induced fence on device %d\n", + getDeviceID(devID)); } } else { - printf( + printf( "KokkosP: FATAL: Kokkos Tools Programming Interface's tool-invoked " "Fence is NULL!\n"); - exit(-1); - } + exit(-1); + } } void kokkosp_provide_tool_programming_interface( @@ -84,7 +86,6 @@ void kokkosp_provide_tool_programming_interface( tpi_funcs = *funcsFromTPI; } - void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE"); @@ -231,10 +232,10 @@ void kokkosp_end_parallel_for(const uint64_t kID) { (unsigned long long)(kID)); } get_global_fence_choice(); // re-read environment variable to get most - // accurate + // accurate if (tool_globFence) { invoke_ktools_fence(0); - } + } (*endForCallee)(retrievedNestedkID); } } @@ -253,9 +254,9 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, if (NULL != beginScanCallee) { uint64_t nestedkID = 0; get_global_fence_choice(); // re-read environment variable to get most - // accurate + // accurate if (tool_globFence) { - invoke_ktools_fence(0); + invoke_ktools_fence(0); } (*beginScanCallee)(name, devID, &nestedkID); infokIDSample.insert({*kID, nestedkID}); @@ -271,11 +272,11 @@ void kokkosp_end_parallel_scan(const uint64_t kID) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); } - get_global_fence_choice(); // re-read environment variable to get most - // accurate - if (tool_globFence) { - invoke_ktools_fence(0); - } + get_global_fence_choice(); // re-read environment variable to get most + // accurate + if (tool_globFence) { + invoke_ktools_fence(0); + } (*endScanCallee)(retrievedNestedkID); } } @@ -291,11 +292,11 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, printf("KokkosP: sample %llu calling child-begin function...\n", (unsigned long long)(*kID)); } - + if (NULL != beginReduceCallee) { uint64_t nestedkID = 0; get_global_fence_choice(); // re-read environment variable to get most - // accurate + // accurate if (tool_globFence) { invoke_ktools_fence(0); } @@ -313,11 +314,11 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); } - get_global_fence_choice(); // re-read environment variable to get most - // accurate - if (tool_globFence) { - invoke_ktools_fence(0); - } + get_global_fence_choice(); // re-read environment variable to get most + // accurate + if (tool_globFence) { + invoke_ktools_fence(0); + } (*endScanCallee)(retrievedNestedkID); } } From ada1c7156ec06cd75672eb7b66342943be6c9b15 Mon Sep 17 00:00:00 2001 From: Vivek Kale Date: Thu, 14 Sep 2023 11:01:46 -0700 Subject: [PATCH 05/11] fixing kp samplere --- common/kokkos-sampler/Makefile | 2 +- common/kokkos-sampler/kp_sampler_skip.cpp | 5 +++-- profiling/all/kp_core.hpp | 10 +++++++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/common/kokkos-sampler/Makefile b/common/kokkos-sampler/Makefile index 862cae8fa..ce5f56ca1 100644 --- a/common/kokkos-sampler/Makefile +++ b/common/kokkos-sampler/Makefile @@ -1,4 +1,4 @@ -CXX = clang++ +CXX = g++ CXXFLAGS = -O3 -std=c++17 -g diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 3332aab54..c96ed3f10 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -81,8 +81,9 @@ void kokkosp_provide_tool_programming_interface( "KokkosP: Note: Number of functions in Tools Programming Interface " "is 0!\n"); } + tpi_funcs = *funcsFromTPI; -} + } void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, @@ -329,8 +330,8 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) { extern "C" { namespace impl = KokkosTools::Sampler; - EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) +EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(impl::kokkosp_provide_tool_programming_interface) EXPOSE_INIT(impl::kokkosp_init_library) EXPOSE_FINALIZE(impl::kokkosp_finalize_library) EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) diff --git a/profiling/all/kp_core.hpp b/profiling/all/kp_core.hpp index 5cb5ed391..c46210be7 100644 --- a/profiling/all/kp_core.hpp +++ b/profiling/all/kp_core.hpp @@ -48,14 +48,22 @@ using Kokkos::Tools::SpaceHandle; #define EXPOSE_PROFILE_EVENT(FUNC_NAME) #define EXPOSE_BEGIN_FENCE(FUNC_NAME) #define EXPOSE_END_FENCE(FUNC_NAME) +#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) + #else +#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_provide_tool_programming_interface( \ + const uint32_t num_actions, Kokkos_Tools_ToolProgrammingInterface* ptpi) { \ + FUNC_NAME(num_actions, ptpi); \ + } + #define EXPOSE_TOOL_SETTINGS(FUNC_NAME) \ __attribute__((weak)) void kokkosp_request_tool_settings( \ const uint32_t num_actions, Kokkos_Tools_ToolSettings* settings) { \ FUNC_NAME(num_actions, settings); \ - } + } #define EXPOSE_INIT(FUNC_NAME) \ __attribute__((weak)) void kokkosp_init_library( \ From f6e81b5062fe518e6511a9d6887e4261eb676617 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 14 Sep 2023 11:27:02 -0700 Subject: [PATCH 06/11] kp sampler applied clang format --- common/kokkos-sampler/kp_sampler_skip.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 696cecd32..e91714e05 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -83,9 +83,9 @@ void kokkosp_provide_tool_programming_interface( "KokkosP: Note: Number of functions in Tools Programming Interface " "is 0!\n"); } - + tpi_funcs = *funcsFromTPI; - } +} void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { @@ -332,7 +332,8 @@ extern "C" { namespace impl = KokkosTools::Sampler; EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) -EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(impl::kokkosp_provide_tool_programming_interface) +EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE( + impl::kokkosp_provide_tool_programming_interface) EXPOSE_INIT(impl::kokkosp_init_library) EXPOSE_FINALIZE(impl::kokkosp_finalize_library) EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) From d42680d9cafb9937e9c3c2f5c6c05f403b1332ed Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 14 Sep 2023 12:04:54 -0700 Subject: [PATCH 07/11] applied clang format to kp_core --- profiling/all/kp_core.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/profiling/all/kp_core.hpp b/profiling/all/kp_core.hpp index c46210be7..c63db1863 100644 --- a/profiling/all/kp_core.hpp +++ b/profiling/all/kp_core.hpp @@ -50,20 +50,20 @@ using Kokkos::Tools::SpaceHandle; #define EXPOSE_END_FENCE(FUNC_NAME) #define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) - #else -#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) \ - __attribute__((weak)) void kokkosp_provide_tool_programming_interface( \ - const uint32_t num_actions, Kokkos_Tools_ToolProgrammingInterface* ptpi) { \ - FUNC_NAME(num_actions, ptpi); \ +#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) \ + __attribute__((weak)) void kokkosp_provide_tool_programming_interface( \ + const uint32_t num_actions, \ + Kokkos_Tools_ToolProgrammingInterface* ptpi) { \ + FUNC_NAME(num_actions, ptpi); \ } #define EXPOSE_TOOL_SETTINGS(FUNC_NAME) \ __attribute__((weak)) void kokkosp_request_tool_settings( \ const uint32_t num_actions, Kokkos_Tools_ToolSettings* settings) { \ FUNC_NAME(num_actions, settings); \ - } + } #define EXPOSE_INIT(FUNC_NAME) \ __attribute__((weak)) void kokkosp_init_library( \ From 889af6d288519ac5acf27cc0bf8752799287aa3f Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 5 Oct 2023 10:24:22 -0700 Subject: [PATCH 08/11] kp_sampler_skip.cpp: removing glob fence check --- common/kokkos-sampler/kp_sampler_skip.cpp | 24 +---------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index e91714e05..f28098b47 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -40,14 +40,6 @@ void kokkosp_request_tool_settings(const uint32_t, } } -void get_global_fence_choice() { - // re-read environment variable to get most accurate value - const char* tool_globFence_str = getenv("KOKKOS_TOOLS_GLOBALFENCES"); - if (NULL != tool_globFence_str) { - tool_globFence = atoi(tool_globFence_str); - } -} - // set of functions from Kokkos ToolProgrammingInterface (includes fence) Kokkos::Tools::Experimental::ToolProgrammingInterface tpi_funcs; @@ -83,7 +75,6 @@ void kokkosp_provide_tool_programming_interface( "KokkosP: Note: Number of functions in Tools Programming Interface " "is 0!\n"); } - tpi_funcs = *funcsFromTPI; } @@ -211,10 +202,8 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, printf("KokkosP: sample %llu calling child-begin function...\n", (unsigned long long)(*kID)); } - get_global_fence_choice(); // re-read environment variable to get most - // accurate if (tool_globFence) { - invoke_ktools_fence(devID); + invoke_ktools_fence(0); } if (NULL != beginForCallee) { uint64_t nestedkID = 0; @@ -232,8 +221,6 @@ void kokkosp_end_parallel_for(const uint64_t kID) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); } - get_global_fence_choice(); // re-read environment variable to get most - // accurate if (tool_globFence) { invoke_ktools_fence(0); } @@ -254,8 +241,6 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, } if (NULL != beginScanCallee) { uint64_t nestedkID = 0; - get_global_fence_choice(); // re-read environment variable to get most - // accurate if (tool_globFence) { invoke_ktools_fence(0); } @@ -273,8 +258,6 @@ void kokkosp_end_parallel_scan(const uint64_t kID) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); } - get_global_fence_choice(); // re-read environment variable to get most - // accurate if (tool_globFence) { invoke_ktools_fence(0); } @@ -293,11 +276,8 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, printf("KokkosP: sample %llu calling child-begin function...\n", (unsigned long long)(*kID)); } - if (NULL != beginReduceCallee) { uint64_t nestedkID = 0; - get_global_fence_choice(); // re-read environment variable to get most - // accurate if (tool_globFence) { invoke_ktools_fence(devID); } @@ -315,8 +295,6 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); } - get_global_fence_choice(); // re-read environment variable to get most - // accurate if (tool_globFence) { invoke_ktools_fence(0); } From 0f625365a1d4b1649677ec9916cf29733a348bee Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 5 Oct 2023 15:38:36 -0400 Subject: [PATCH 09/11] kp_sampler_skip.cpp: invoke_ktools_fence on devID=0 Passing devID to invoke_ktools_febce() instead of 0 is in a separate PR. Checking fence is done only on devID hasn't been tested in this PR and isn't directly related to this PR. --- common/kokkos-sampler/kp_sampler_skip.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index f28098b47..3645c8ab7 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -279,7 +279,7 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, if (NULL != beginReduceCallee) { uint64_t nestedkID = 0; if (tool_globFence) { - invoke_ktools_fence(devID); + invoke_ktools_fence(0); } (*beginReduceCallee)(name, devID, &nestedkID); infokIDSample.insert({*kID, nestedkID}); From bc2c87dd7e71332daab2982d9387b6450efd00fd Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 12 Oct 2023 10:32:15 -0700 Subject: [PATCH 10/11] Update kp_sampler_skip.cpp --- common/kokkos-sampler/kp_sampler_skip.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 3645c8ab7..5e496a451 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -33,11 +33,7 @@ static endFunction endReduceCallee = NULL; void kokkosp_request_tool_settings(const uint32_t, Kokkos_Tools_ToolSettings* settings) { - if (0 == tool_globFence) { settings->requires_global_fencing = false; - } else { - settings->requires_global_fencing = true; - } } // set of functions from Kokkos ToolProgrammingInterface (includes fence) From 1f0adb4717d5ce8c5c5a35690c8ea8e35e716e3e Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 12 Oct 2023 10:39:14 -0700 Subject: [PATCH 11/11] applied clang format to kp_sampler_skip --- common/kokkos-sampler/kp_sampler_skip.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 5e496a451..9131e420f 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -33,7 +33,7 @@ static endFunction endReduceCallee = NULL; void kokkosp_request_tool_settings(const uint32_t, Kokkos_Tools_ToolSettings* settings) { - settings->requires_global_fencing = false; + settings->requires_global_fencing = false; } // set of functions from Kokkos ToolProgrammingInterface (includes fence)