Skip to content

Commit

Permalink
Merge pull request #209 from vlkale/fenceOnSampleOnly
Browse files Browse the repository at this point in the history
Fence on sample only
  • Loading branch information
crtrott authored Oct 12, 2023
2 parents c3e85a6 + 1f0adb4 commit 2ddedef
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 6 deletions.
2 changes: 1 addition & 1 deletion common/kokkos-sampler/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CXX = clang++
CXX = g++

CXXFLAGS = -O3 -std=c++17 -g

Expand Down
62 changes: 57 additions & 5 deletions common/kokkos-sampler/kp_sampler_skip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,47 @@ static endFunction endReduceCallee = NULL;

void kokkosp_request_tool_settings(const uint32_t,
Kokkos_Tools_ToolSettings* settings) {
if (0 == tool_globFence) {
settings->requires_global_fencing = false;
settings->requires_global_fencing = false;
}

// set of functions from Kokkos ToolProgrammingInterface (includes fence)
Kokkos::Tools::Experimental::ToolProgrammingInterface tpi_funcs;

uint32_t getDeviceID(uint32_t devid_in) {
int num_device_bits = 7;
int num_instance_bits = 17;
return (~((uint32_t(-1)) << num_device_bits)) &
(devid_in >> num_instance_bits);
}

void invoke_ktools_fence(uint32_t devID) {
if (tpi_funcs.fence != nullptr) {
tpi_funcs.fence(devID);
if (tool_verbosity > 1) {
printf(
"KokkosP: Sampler utility sucessfully invoked "
" tool-induced fence on device %d\n",
getDeviceID(devID));
}
} else {
settings->requires_global_fencing = true;
printf(
"KokkosP: FATAL: Kokkos Tools Programming Interface's tool-invoked "
"Fence is NULL!\n");
exit(-1);
}
}

void kokkosp_provide_tool_programming_interface(
uint32_t num_funcs, Kokkos_Tools_ToolProgrammingInterface* funcsFromTPI) {
if (!num_funcs) {
if (tool_verbosity > 0)
printf(
"KokkosP: Note: Number of functions in Tools Programming Interface "
"is 0!\n");
}
tpi_funcs = *funcsFromTPI;
}

void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
const uint32_t devInfoCount, void* deviceInfo) {
const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE");
Expand Down Expand Up @@ -164,6 +198,9 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID,
printf("KokkosP: sample %llu calling child-begin function...\n",
(unsigned long long)(*kID));
}
if (tool_globFence) {
invoke_ktools_fence(0);
}
if (NULL != beginForCallee) {
uint64_t nestedkID = 0;
(*beginForCallee)(name, devID, &nestedkID);
Expand All @@ -180,6 +217,9 @@ void kokkosp_end_parallel_for(const uint64_t kID) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
}
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*endForCallee)(retrievedNestedkID);
infokIDSample.erase(kID);
}
Expand All @@ -198,6 +238,9 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID,
}
if (NULL != beginScanCallee) {
uint64_t nestedkID = 0;
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*beginScanCallee)(name, devID, &nestedkID);
infokIDSample.insert({*kID, nestedkID});
}
Expand All @@ -212,6 +255,9 @@ void kokkosp_end_parallel_scan(const uint64_t kID) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
}
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*endScanCallee)(retrievedNestedkID);
infokIDSample.erase(kID);
}
Expand All @@ -228,9 +274,11 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID,
printf("KokkosP: sample %llu calling child-begin function...\n",
(unsigned long long)(*kID));
}

if (NULL != beginReduceCallee) {
uint64_t nestedkID = 0;
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*beginReduceCallee)(name, devID, &nestedkID);
infokIDSample.insert({*kID, nestedkID});
}
Expand All @@ -245,6 +293,9 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) {
printf("KokkosP: sample %llu calling child-end function...\n",
(unsigned long long)(kID));
}
if (tool_globFence) {
invoke_ktools_fence(0);
}
(*endScanCallee)(retrievedNestedkID);
infokIDSample.erase(kID);
}
Expand All @@ -257,8 +308,9 @@ void kokkosp_end_parallel_reduce(const uint64_t kID) {
extern "C" {

namespace impl = KokkosTools::Sampler;

EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings)
EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(
impl::kokkosp_provide_tool_programming_interface)
EXPOSE_INIT(impl::kokkosp_init_library)
EXPOSE_FINALIZE(impl::kokkosp_finalize_library)
EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for)
Expand Down
8 changes: 8 additions & 0 deletions profiling/all/kp_core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,17 @@ using Kokkos::Tools::SpaceHandle;
#define EXPOSE_PROFILE_EVENT(FUNC_NAME)
#define EXPOSE_BEGIN_FENCE(FUNC_NAME)
#define EXPOSE_END_FENCE(FUNC_NAME)
#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME)

#else

#define EXPOSE_PROVIDE_TOOL_PROGRAMMING_INTERFACE(FUNC_NAME) \
__attribute__((weak)) void kokkosp_provide_tool_programming_interface( \
const uint32_t num_actions, \
Kokkos_Tools_ToolProgrammingInterface* ptpi) { \
FUNC_NAME(num_actions, ptpi); \
}

#define EXPOSE_TOOL_SETTINGS(FUNC_NAME) \
__attribute__((weak)) void kokkosp_request_tool_settings( \
const uint32_t num_actions, Kokkos_Tools_ToolSettings* settings) { \
Expand Down

0 comments on commit 2ddedef

Please sign in to comment.