Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fence on sample only #209

Merged
merged 13 commits into from
Oct 12, 2023
2 changes: 1 addition & 1 deletion common/kokkos-sampler/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CXX = clang++
CXX = g++

CXXFLAGS = -O3 -std=c++17 -g

Expand Down
80 changes: 79 additions & 1 deletion common/kokkos-sampler/kp_sampler_skip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,53 @@ void kokkosp_request_tool_settings(const uint32_t,
}
}

void get_global_fence_choice() {
// re-read environment variable to get most accurate value
const char* tool_globFence_str = getenv("KOKKOS_TOOLS_GLOBALFENCES");
if (NULL != tool_globFence_str) {
tool_globFence = atoi(tool_globFence_str);
}
}

// set of functions from Kokkos ToolProgrammingInterface (includes fence)
Kokkos::Tools::Experimental::ToolProgrammingInterface tpi_funcs;

uint32_t getDeviceID(uint32_t devid_in) {
int num_device_bits = 7;
int num_instance_bits = 17;
return (~((uint32_t(-1)) << num_device_bits)) &
(devid_in >> num_instance_bits);
}

void invoke_ktools_fence(uint32_t devID) {
if (tpi_funcs.fence != nullptr) {
tpi_funcs.fence(devID);
if (tool_verbosity > 1) {
printf(
"KokkosP: Sampler utility sucessfully invoked "
" tool-induced fence on device %d\n",
getDeviceID(devID));
}
} else {
printf(
"KokkosP: FATAL: Kokkos Tools Programming Interface's tool-invoked "
"Fence is NULL!\n");
exit(-1);
}
}

void kokkosp_provide_tool_programming_interface(
uint32_t num_funcs, Kokkos_Tools_ToolProgrammingInterface* funcsFromTPI) {
if (!num_funcs) {
if (tool_verbosity > 0)
printf(
"KokkosP: Note: Number of functions in Tools Programming Interface "
"is 0!\n");
}

tpi_funcs = *funcsFromTPI;
}

void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
const uint32_t devInfoCount, void* deviceInfo) {
const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE");
Expand Down Expand Up @@ -164,6 +211,11 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID,
printf("KokkosP: sample %llu calling child-begin function...\n",
(unsigned long long)(*kID));
}
get_global_fence_choice(); // re-read environment variable to get most
// accurate
vlkale marked this conversation as resolved.
Show resolved Hide resolved
if (tool_globFence) {
invoke_ktools_fence(devID);
}
if (NULL != beginForCallee) {
uint64_t nestedkID = 0;
(*beginForCallee)(name, devID, &nestedkID);
Expand All @@ -180,6 +232,11 @@ void kokkosp_end_parallel_for(const uint64_t kID) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
}
get_global_fence_choice(); // re-read environment variable to get most
// accurate
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*endForCallee)(retrievedNestedkID);
}
}
Expand All @@ -197,6 +254,11 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID,
}
if (NULL != beginScanCallee) {
uint64_t nestedkID = 0;
get_global_fence_choice(); // re-read environment variable to get most
// accurate
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*beginScanCallee)(name, devID, &nestedkID);
infokIDSample.insert({*kID, nestedkID});
}
Expand All @@ -211,6 +273,11 @@ void kokkosp_end_parallel_scan(const uint64_t kID) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
}
get_global_fence_choice(); // re-read environment variable to get most
// accurate
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*endScanCallee)(retrievedNestedkID);
}
}
Expand All @@ -229,6 +296,11 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID,

if (NULL != beginReduceCallee) {
uint64_t nestedkID = 0;
get_global_fence_choice(); // re-read environment variable to get most
// accurate
if (tool_globFence) {
invoke_ktools_fence(devID);
}
(*beginReduceCallee)(name, devID, &nestedkID);
infokIDSample.insert({*kID, nestedkID});
}
Expand All @@ -243,6 +315,11 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
}
get_global_fence_choice(); // re-read environment variable to get most
// accurate
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*endScanCallee)(retrievedNestedkID);
}
}
Expand All @@ -254,8 +331,9 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) {
extern "C" {

namespace impl = KokkosTools::Sampler;

EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings)
EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(
impl::kokkosp_provide_tool_programming_interface)
EXPOSE_INIT(impl::kokkosp_init_library)
EXPOSE_FINALIZE(impl::kokkosp_finalize_library)
EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for)
Expand Down
8 changes: 8 additions & 0 deletions profiling/all/kp_core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,17 @@ using Kokkos::Tools::SpaceHandle;
#define EXPOSE_PROFILE_EVENT(FUNC_NAME)
#define EXPOSE_BEGIN_FENCE(FUNC_NAME)
#define EXPOSE_END_FENCE(FUNC_NAME)
#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME)

#else

#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) \
__attribute__((weak)) void kokkosp_provide_tool_programming_interface( \
const uint32_t num_actions, \
Kokkos_Tools_ToolProgrammingInterface* ptpi) { \
FUNC_NAME(num_actions, ptpi); \
}

#define EXPOSE_TOOL_SETTINGS(FUNC_NAME) \
__attribute__((weak)) void kokkosp_request_tool_settings( \
const uint32_t num_actions, Kokkos_Tools_ToolSettings* settings) { \
Expand Down