From fc36b9de7a1abdd739d7a27ff4a5b462ea4eedff Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 20 Apr 2023 21:31:28 -0400 Subject: [PATCH 01/22] Update kp_sampler_skip.cpp Added tool_random_mode and tool_periodic_mode to identify whether tool uses periodic sampling or random sampling (or possibly a combination of both (every 20th timestep, gather data with 50% probability). --- common/kokkos-sampler/kp_sampler_skip.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index fa6ea655e..eee6bb98e 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -28,6 +28,9 @@ static endFunction endReduceCallee = NULL; void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE"); + const char* tool_random_mode = getenv("KOKKOS_TOOLS_SAMPLER_RANDOMIZED"); + const char* tool_periodic_mode = getenv("KOKKOS_TOOLS_SAMPLER_PERIODIC"); + if (NULL != tool_verbose_str) { tool_verbosity = atoi(tool_verbose_str); } else { From abafd7ad3ce830f76ae7067dba77b9808c3baf1f Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:41:20 -0700 Subject: [PATCH 02/22] update to Makefile --- common/kokkos-sampler/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/kokkos-sampler/Makefile b/common/kokkos-sampler/Makefile index 16feae373..a1258b698 100644 --- a/common/kokkos-sampler/Makefile +++ b/common/kokkos-sampler/Makefile @@ -7,7 +7,7 @@ SHARED_CXXFLAGS= -shared -fPIC MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} +CXXFLAGS+=-I${MAKEFILE_PATH} -I../../profiling/all/ -I../makefile-only/ kp_sampler.so: ${MAKEFILE_PATH}kp_sampler_skip.cpp $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) -o $@ ${MAKEFILE_PATH}kp_sampler_skip.cpp From 3bede6050d4048da5724db252f53417318eb266d Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:42:44 -0700 Subject: [PATCH 03/22] putting in fix to sampler skip with randomized probabilities --- common/kokkos-sampler/kp_sampler_skip.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index eee6bb98e..58a5e53a4 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -3,6 +3,8 @@ #include #include #include +#include "../../profiling/all/kp_core.hpp" +#include "kp_config.hpp" namespace KokkosTools { namespace Sampler { @@ -30,7 +32,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE"); const char* tool_random_mode = getenv("KOKKOS_TOOLS_SAMPLER_RANDOMIZED"); const char* tool_periodic_mode = getenv("KOKKOS_TOOLS_SAMPLER_PERIODIC"); - + if (NULL != tool_verbose_str) { tool_verbosity = atoi(tool_verbose_str); } else { @@ -124,12 +126,14 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, uniqID = 1; const char* tool_sample = getenv("KOKKOS_TOOLS_SAMPLER_SKIP"); + const char* tool_probability = getenv("KOKKOS_TOOLS_SAMPLER_PROBABILITY"); if (NULL != tool_sample) { kernelSampleSkip = atoi(tool_sample) + 1; } if (tool_verbosity > 0) { printf("KokkosP: Sampling rate set to: %s\n", tool_sample); + printf("KokkosP: Sampling probability set to: %s\n", tool_probability); } } @@ -223,11 +227,11 @@ extern "C" { namespace impl = KokkosTools::Sampler; -EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) -EXPOSE_INIT(impl::kokkosp_init_library) -EXPOSE_FINALIZE(impl::kokkosp_finalize_library) -EXPOSE_PUSH_REGION(impl::kokkosp_push_profile_region) -EXPOSE_POP_REGION(impl::kokkosp_pop_profile_region) +EXPOSE_TOOL_SETTINGS(kokkosp_request_tool_settings) +EXPOSE_INIT(kokkosp_init_library) +EXPOSE_FINALIZE(kokkosp_finalize_library) +EXPOSE_PUSH_REGION(kokkosp_push_profile_region) +EXPOSE_POP_REGION(kokkosp_pop_profile_region) EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) From 7d2302d8c6b69721a289461a136c5de1899d24d4 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 8 Jun 2023 15:12:04 -0700 Subject: [PATCH 04/22] committing sampler formatted file --- common/kokkos-sampler/kp_sampler_skip.cpp | 156 +++++++++++----------- 1 file changed, 80 insertions(+), 76 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 5d480f001..0b8676911 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -9,11 +9,11 @@ namespace KokkosTools { namespace Sampler { -static uint64_t uniqID = 0; -static uint64_t kernelSampleSkip = 101; +static uint64_t uniqID = 0; +static uint64_t kernelSampleSkip = 101; static double kernelSampleProbability = 1.0; -static int tool_verbosity = 0; -static int tool_globFence = 0; +static int tool_verbosity = 0; +static int tool_globFence = 0; typedef void (*initFunction)(const int, const uint64_t, const uint32_t, void*); typedef void (*finalizeFunction)(); @@ -40,9 +40,9 @@ void kokkosp_request_tool_settings(const uint32_t, void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { - const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE"); + const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE"); const char* tool_globFence_str = getenv("KOKKOS_TOOLS_GLOBALFENCES"); - + if (NULL != tool_verbose_str) { tool_verbosity = atoi(tool_verbose_str); } else { @@ -53,7 +53,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, } else { tool_globFence = 0; } - + char* profileLibrary = getenv("KOKKOS_TOOLS_LIBS"); if (NULL == profileLibrary) { printf( @@ -137,31 +137,36 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, uniqID = 1; - const char* tool_sample = getenv("KOKKOS_TOOLS_SAMPLER_SKIP"); + const char* tool_sample = getenv("KOKKOS_TOOLS_SAMPLER_SKIP"); const char* tool_probability = getenv("KOKKOS_TOOLS_SAMPLER_PROBABILITY"); - const char* tool_periodprob_compose_str = getenv("KOKKOS_TOOLS_SAMPLER_PERIODPROB_COMPOSE"); - // composing the periodicity and probability parameter. Set to 1 if probability for - // each periodic sample. Set to 0 to default to probability even if periodicity (skip rate) is defined. + const char* tool_periodprob_compose_str = + getenv("KOKKOS_TOOLS_SAMPLER_PERIODPROB_COMPOSE"); + // composing the periodicity and probability parameter. Set to 1 if + // probability for each periodic sample. Set to 0 to default to probability + // even if periodicity (skip rate) is defined. if (NULL != tool_sample) { kernelSampleSkip = atoi(tool_sample) + 1; } if (NULL != tool_probability) { - // read sampling probability as an integer between 1 and 100, but - // programs reasons about probability as a double between 0.0 and 1.0. - kernelSampleProbability = (double(atoi(tool_probability)))/100.0; - srand48((unsigned)clock()); + // read sampling probability as an integer between 1 and 100, but + // programs reasons about probability as a double between 0.0 and 1.0. + kernelSampleProbability = (double(atoi(tool_probability))) / 100.0; + srand48((unsigned)clock()); } if (tool_verbosity > 0) { printf("KokkosP: Sampling rate set to: %s\n", tool_sample); - printf("KokkosP: Sampling probability set to: %s\n", tool_probability); + printf("KokkosP: Sampling probability set to: %s\n", tool_probability); } - if( (NULL != tool_probability) && (NULL !=tool_sample)) { - printf("KokkosP: Note that both probability and skip rate are set. Kokkos Tools Sampler utility will invoke Kokkos Tool child event with a probability at the skip rate.\n"); - } -} // end kokkosp_init_library + if ((NULL != tool_probability) && (NULL != tool_sample)) { + printf( + "KokkosP: Note that both probability and skip rate are set. Kokkos " + "Tools Sampler utility will invoke Kokkos Tool child event with a " + "probability at the skip rate.\n"); + } +} // end kokkosp_init_library void kokkosp_finalize_library() { if (NULL != finalizeProfileLibrary) (*finalizeProfileLibrary)(); @@ -170,33 +175,33 @@ void kokkosp_finalize_library() { void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { *kID = uniqID++; - if (((*kID) % kernelSampleSkip) == 0) { - if (drand48() < kernelSampleProbability) { + if (((*kID) % kernelSampleSkip) == 0) { + if (drand48() < kernelSampleProbability) { if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-begin function...\n", - (unsigned long long)(*kID)); - } - if (NULL != beginForCallee) { - (*beginForCallee)(name, devID, kID); + printf("KokkosP: sample %llu calling child-begin function...\n", + (unsigned long long)(*kID)); + } + if (NULL != beginForCallee) { + (*beginForCallee)(name, devID, kID); + } } - } - } -} // kokkosp_begin_parallel_for + } +} // kokkosp_begin_parallel_for void kokkosp_end_parallel_for(const uint64_t kID) { if ((kID % kernelSampleSkip) == 0) { - if (drand48() < kernelSampleProbability) { - if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-end function...\n", - (unsigned long long)(kID)); - } + if (drand48() < kernelSampleProbability) { + if (tool_verbosity > 0) { + printf("KokkosP: sample %llu calling child-end function...\n", + (unsigned long long)(kID)); + } - if (NULL != endForCallee) { - (*endForCallee)(kID); + if (NULL != endForCallee) { + (*endForCallee)(kID); + } } - } } -} // kokkosp_end_parallel_for +} // kokkosp_end_parallel_for void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { @@ -204,64 +209,63 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, if (((*kID) % kernelSampleSkip) == 0) { if (drand48() < kernelSampleProbability) { - if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-begin function...\n", - (unsigned long long)(*kID)); - } + if (tool_verbosity > 0) { + printf("KokkosP: sample %llu calling child-begin function...\n", + (unsigned long long)(*kID)); + } - if (NULL != beginScanCallee) { - (*beginScanCallee)(name, devID, kID); + if (NULL != beginScanCallee) { + (*beginScanCallee)(name, devID, kID); + } } - } } -} // kokkosp_end_parallel_for +} // kokkosp_end_parallel_for void kokkosp_end_parallel_scan(const uint64_t kID) { - - if ((kID % kernelSampleSkip) == 0) { + if ((kID % kernelSampleSkip) == 0) { if (drand48() < kernelSampleProbability) { - if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-end function...\n", - (unsigned long long)(kID)); - } - if (NULL != endScanCallee) { - (*endScanCallee)(kID); + if (tool_verbosity > 0) { + printf("KokkosP: sample %llu calling child-end function...\n", + (unsigned long long)(kID)); + } + if (NULL != endScanCallee) { + (*endScanCallee)(kID); + } } } - } -} // kokkosp_end_parallel_scan +} // kokkosp_end_parallel_scan void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { *kID = uniqID++; if (((*kID) % kernelSampleSkip) == 0) { - if (drand48() < kernelSampleProbability) { - if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-begin function...\n", - (unsigned long long)(*kID)); - } + if (drand48() < kernelSampleProbability) { + if (tool_verbosity > 0) { + printf("KokkosP: sample %llu calling child-begin function...\n", + (unsigned long long)(*kID)); + } - if (NULL != beginReduceCallee) { - (*beginReduceCallee)(name, devID, kID); + if (NULL != beginReduceCallee) { + (*beginReduceCallee)(name, devID, kID); + } } - } - } -} // kokkosp_begin_parallel_reduce + } +} // kokkosp_begin_parallel_reduce void kokkosp_end_parallel_reduce(const uint64_t kID) { if ((kID % kernelSampleSkip) == 0) { - if (drand48() < kernelSampleProbability) { - if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-end function...\n", - (unsigned long long)(kID)); - } - if (NULL != endReduceCallee) { - (*endReduceCallee)(kID); + if (drand48() < kernelSampleProbability) { + if (tool_verbosity > 0) { + printf("KokkosP: sample %llu calling child-end function...\n", + (unsigned long long)(kID)); + } + if (NULL != endReduceCallee) { + (*endReduceCallee)(kID); + } } } - } -} // kokkosp_end_parallel_reduce +} // kokkosp_end_parallel_reduce } // namespace Sampler } // namespace KokkosTools From 5e4f762c60753afbe6780cf593edc4e0500ebb1d Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 8 Jun 2023 16:13:01 -0700 Subject: [PATCH 05/22] putting fixes for formatting into kp_sampler skip --- common/kokkos-sampler/kp_sampler_skip.cpp | 51 +++++++++++++++-------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 0b8676911..e14290826 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -6,12 +6,14 @@ #include "../../profiling/all/kp_core.hpp" #include "kp_config.hpp" #include +#include namespace KokkosTools { namespace Sampler { static uint64_t uniqID = 0; static uint64_t kernelSampleSkip = 101; static double kernelSampleProbability = 1.0; +static uint64_t tool_prob_num = 100; static int tool_verbosity = 0; static int tool_globFence = 0; @@ -40,6 +42,7 @@ void kokkosp_request_tool_settings(const uint32_t, void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { + printf("KokkosP: Sampler: hello! in init\n"); const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE"); const char* tool_globFence_str = getenv("KOKKOS_TOOLS_GLOBALFENCES"); @@ -65,7 +68,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, exit(-1); } } - + printf("KokkosP: hello!\n"); char* envBuffer = (char*)malloc(sizeof(char) * (strlen(profileLibrary) + 1)); strcpy(envBuffer, profileLibrary); @@ -139,8 +142,6 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const char* tool_sample = getenv("KOKKOS_TOOLS_SAMPLER_SKIP"); const char* tool_probability = getenv("KOKKOS_TOOLS_SAMPLER_PROBABILITY"); - const char* tool_periodprob_compose_str = - getenv("KOKKOS_TOOLS_SAMPLER_PERIODPROB_COMPOSE"); // composing the periodicity and probability parameter. Set to 1 if // probability for each periodic sample. Set to 0 to default to probability // even if periodicity (skip rate) is defined. @@ -152,9 +153,25 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, if (NULL != tool_probability) { // read sampling probability as an integer between 1 and 100, but // programs reasons about probability as a double between 0.0 and 1.0. - kernelSampleProbability = (double(atoi(tool_probability))) / 100.0; - srand48((unsigned)clock()); + tool_prob_num = atoi(tool_probability); + if (tool_prob_num > 100) { + printf( + "KokkosP: Tool sample probability was set to be greater than 100. " + "Setting to 100.\n"); + tool_prob_num = 100; + } else if (tool_prob_num < 0) { + printf( + "KokkosP: Tool sample probability was set to be less than 0. Setting " + "to 0.\n"); + tool_prob_num = 0; + kernelSampleProbability = 0.0; + } + printf("KokkosP: debug: kernelSampleProbability is %f\n", + kernelSampleProbability); } + // srand48((unsigned)clock()); + // seed48(0); + srand(time(NULL)); if (tool_verbosity > 0) { printf("KokkosP: Sampling rate set to: %s\n", tool_sample); @@ -176,7 +193,7 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { *kID = uniqID++; if (((*kID) % kernelSampleSkip) == 0) { - if (drand48() < kernelSampleProbability) { + if ((rand() % 100) < tool_prob_num) { if (tool_verbosity > 0) { printf("KokkosP: sample %llu calling child-begin function...\n", (unsigned long long)(*kID)); @@ -190,7 +207,7 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, void kokkosp_end_parallel_for(const uint64_t kID) { if ((kID % kernelSampleSkip) == 0) { - if (drand48() < kernelSampleProbability) { + if ((rand() % 100) < tool_prob_num) { if (tool_verbosity > 0) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); @@ -208,7 +225,7 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, *kID = uniqID++; if (((*kID) % kernelSampleSkip) == 0) { - if (drand48() < kernelSampleProbability) { + if ((rand() % 100) < tool_prob_num) { if (tool_verbosity > 0) { printf("KokkosP: sample %llu calling child-begin function...\n", (unsigned long long)(*kID)); @@ -223,7 +240,7 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, void kokkosp_end_parallel_scan(const uint64_t kID) { if ((kID % kernelSampleSkip) == 0) { - if (drand48() < kernelSampleProbability) { + if ((rand() % 100) < tool_prob_num) { if (tool_verbosity > 0) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); @@ -240,7 +257,7 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, *kID = uniqID++; if (((*kID) % kernelSampleSkip) == 0) { - if (drand48() < kernelSampleProbability) { + if ((rand() % 100) < tool_prob_num) { if (tool_verbosity > 0) { printf("KokkosP: sample %llu calling child-begin function...\n", (unsigned long long)(*kID)); @@ -255,7 +272,7 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, void kokkosp_end_parallel_reduce(const uint64_t kID) { if ((kID % kernelSampleSkip) == 0) { - if (drand48() < kernelSampleProbability) { + if ((rand() % 100) < tool_prob_num) { if (tool_verbosity > 0) { printf("KokkosP: sample %llu calling child-end function...\n", (unsigned long long)(kID)); @@ -279,11 +296,11 @@ EXPOSE_INIT(kokkosp_init_library) EXPOSE_FINALIZE(kokkosp_finalize_library) EXPOSE_PUSH_REGION(kokkosp_push_profile_region) EXPOSE_POP_REGION(kokkosp_pop_profile_region) -EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) -EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) -EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) -EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) -EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) -EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) +EXPOSE_BEGIN_PARALLEL_FOR(kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(kokkosp_end_parallel_reduce) } // end extern "C" From 6269ab47aa29a31682a8c403ba0818db05633d2d Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 8 Jun 2023 16:22:00 -0700 Subject: [PATCH 06/22] fixed kp_sampler making tool_prob_num an int --- common/kokkos-sampler/kp_sampler_skip.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index e14290826..b941a3111 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -12,8 +12,7 @@ namespace KokkosTools { namespace Sampler { static uint64_t uniqID = 0; static uint64_t kernelSampleSkip = 101; -static double kernelSampleProbability = 1.0; -static uint64_t tool_prob_num = 100; +static int tool_prob_num = 100; static int tool_verbosity = 0; static int tool_globFence = 0; @@ -164,10 +163,9 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, "KokkosP: Tool sample probability was set to be less than 0. Setting " "to 0.\n"); tool_prob_num = 0; - kernelSampleProbability = 0.0; } - printf("KokkosP: debug: kernelSampleProbability is %f\n", - kernelSampleProbability); + printf("KokkosP: debug: kernelSampleProbability is %d\n", + tool_prob_num); } // srand48((unsigned)clock()); // seed48(0); From c7c724c7c8282ac5bc6fc18427ccb0474156bb18 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 8 Jun 2023 16:26:26 -0700 Subject: [PATCH 07/22] committed formatted sampler_skip.cpp --- common/kokkos-sampler/kp_sampler_skip.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index b941a3111..4e4d00e01 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -10,11 +10,11 @@ namespace KokkosTools { namespace Sampler { -static uint64_t uniqID = 0; -static uint64_t kernelSampleSkip = 101; +static uint64_t uniqID = 0; +static uint64_t kernelSampleSkip = 101; static int tool_prob_num = 100; -static int tool_verbosity = 0; -static int tool_globFence = 0; +static int tool_verbosity = 0; +static int tool_globFence = 0; typedef void (*initFunction)(const int, const uint64_t, const uint32_t, void*); typedef void (*finalizeFunction)(); @@ -162,10 +162,9 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, printf( "KokkosP: Tool sample probability was set to be less than 0. Setting " "to 0.\n"); - tool_prob_num = 0; + tool_prob_num = 0; } - printf("KokkosP: debug: kernelSampleProbability is %d\n", - tool_prob_num); + printf("KokkosP: debug: kernelSampleProbability is %d\n", tool_prob_num); } // srand48((unsigned)clock()); // seed48(0); From 9830c5843389495538424718a22edb030c9ca1da Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 8 Jun 2023 17:40:07 -0700 Subject: [PATCH 08/22] fixing randomized samples to obtain correct invocation number. --- common/kokkos-sampler/kp_sampler_skip.cpp | 128 ++++++++++++---------- 1 file changed, 69 insertions(+), 59 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 4e4d00e01..0f04adda8 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -41,7 +41,6 @@ void kokkosp_request_tool_settings(const uint32_t, void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { - printf("KokkosP: Sampler: hello! in init\n"); const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE"); const char* tool_globFence_str = getenv("KOKKOS_TOOLS_GLOBALFENCES"); @@ -67,7 +66,6 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, exit(-1); } } - printf("KokkosP: hello!\n"); char* envBuffer = (char*)malloc(sizeof(char) * (strlen(profileLibrary) + 1)); strcpy(envBuffer, profileLibrary); @@ -164,16 +162,19 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, "to 0.\n"); tool_prob_num = 0; } - printf("KokkosP: debug: kernelSampleProbability is %d\n", tool_prob_num); } // srand48((unsigned)clock()); // seed48(0); - srand(time(NULL)); if (tool_verbosity > 0) { printf("KokkosP: Sampling rate set to: %s\n", tool_sample); printf("KokkosP: Sampling probability set to: %s\n", tool_probability); + printf( + "KokkosP: seeding Random Number Generator using clock for " + "probabilistic sampling\n"); } + srand(time(NULL)); + if ((NULL != tool_probability) && (NULL != tool_sample)) { printf( "KokkosP: Note that both probability and skip rate are set. Kokkos " @@ -188,12 +189,17 @@ void kokkosp_finalize_library() { void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { - *kID = uniqID++; - if (((*kID) % kernelSampleSkip) == 0) { + *kID = 0; + static uint64_t invocationNum; + ++invocationNum; + if ((invocationNum % kernelSampleSkip) == 0) { if ((rand() % 100) < tool_prob_num) { + *kID = 1; // set kernel ID to 1 so that it is matched with the end. if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-begin function...\n", - (unsigned long long)(*kID)); + printf( + "KokkosP: sample %llu on (a parallel_for on its invocation number " + "%d) calling child-begin function...\n", + (unsigned long long)(*kID), (int)invocationNum); } if (NULL != beginForCallee) { (*beginForCallee)(name, devID, kID); @@ -203,63 +209,69 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, } // kokkosp_begin_parallel_for void kokkosp_end_parallel_for(const uint64_t kID) { - if ((kID % kernelSampleSkip) == 0) { - if ((rand() % 100) < tool_prob_num) { - if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-end function...\n", - (unsigned long long)(kID)); - } + if (kID > 0) { + if (tool_verbosity > 0) { + printf( + "KokkosP: sample %llu (a parallel_for) calling child-end " + "function...\n", + (unsigned long long)(kID)); + } - if (NULL != endForCallee) { - (*endForCallee)(kID); - } + if (NULL != endForCallee) { + (*endForCallee)(kID); } } } // kokkosp_end_parallel_for void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { - *kID = uniqID++; - - if (((*kID) % kernelSampleSkip) == 0) { + *kID = 0; + static uint64_t invocationNum; + ++invocationNum; + if ((invocationNum % kernelSampleSkip) == 0) { if ((rand() % 100) < tool_prob_num) { + *kID = 1; // set kernel ID to 1 so that it is matched with the end. if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-begin function...\n", - (unsigned long long)(*kID)); + printf( + "KokkosP: sample %llu (parallel_scan on its invocation num %d) " + "calling child-begin function...\n", + (unsigned long long)(*kID), (int)invocationNum); } - if (NULL != beginScanCallee) { (*beginScanCallee)(name, devID, kID); } } } -} // kokkosp_end_parallel_for +} // kokkosp_begin_parallel_scan void kokkosp_end_parallel_scan(const uint64_t kID) { - if ((kID % kernelSampleSkip) == 0) { - if ((rand() % 100) < tool_prob_num) { - if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-end function...\n", - (unsigned long long)(kID)); - } - if (NULL != endScanCallee) { - (*endScanCallee)(kID); - } + if (kID > 0) { + if (tool_verbosity > 0) { + printf( + "KokkosP: sample %llu (a parallel_scan) calling child-end " + "function...\n", + (unsigned long long)(kID)); + } + if (NULL != endScanCallee) { + (*endScanCallee)(kID); } } } // kokkosp_end_parallel_scan void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { - *kID = uniqID++; - - if (((*kID) % kernelSampleSkip) == 0) { + *kID = 0; + static uint64_t invocationNum; + ++invocationNum; + if ((invocationNum % kernelSampleSkip) == 0) { if ((rand() % 100) < tool_prob_num) { if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-begin function...\n", - (unsigned long long)(*kID)); + printf( + "KokkosP: sample %llu (a parallel_reduce on its invocation number " + "%d) calling child-begin function...\n", + (unsigned long long)(*kID), (int)invocationNum); } - + *kID = 1; if (NULL != beginReduceCallee) { (*beginReduceCallee)(name, devID, kID); } @@ -268,15 +280,15 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, } // kokkosp_begin_parallel_reduce void kokkosp_end_parallel_reduce(const uint64_t kID) { - if ((kID % kernelSampleSkip) == 0) { - if ((rand() % 100) < tool_prob_num) { - if (tool_verbosity > 0) { - printf("KokkosP: sample %llu calling child-end function...\n", - (unsigned long long)(kID)); - } - if (NULL != endReduceCallee) { - (*endReduceCallee)(kID); - } + if (kID > 0) { + if (tool_verbosity > 0) { + printf( + "KokkosP: sample %llu (a parallel_reduce) calling child-end " + "function...\n", + (unsigned long long)(kID)); + } + if (NULL != endReduceCallee) { + (*endReduceCallee)(kID); } } } // kokkosp_end_parallel_reduce @@ -288,16 +300,14 @@ extern "C" { namespace impl = KokkosTools::Sampler; -EXPOSE_TOOL_SETTINGS(kokkosp_request_tool_settings) -EXPOSE_INIT(kokkosp_init_library) -EXPOSE_FINALIZE(kokkosp_finalize_library) -EXPOSE_PUSH_REGION(kokkosp_push_profile_region) -EXPOSE_POP_REGION(kokkosp_pop_profile_region) -EXPOSE_BEGIN_PARALLEL_FOR(kokkosp_begin_parallel_for) -EXPOSE_END_PARALLEL_FOR(kokkosp_end_parallel_for) -EXPOSE_BEGIN_PARALLEL_SCAN(kokkosp_begin_parallel_scan) -EXPOSE_END_PARALLEL_SCAN(kokkosp_end_parallel_scan) -EXPOSE_BEGIN_PARALLEL_REDUCE(kokkosp_begin_parallel_reduce) -EXPOSE_END_PARALLEL_REDUCE(kokkosp_end_parallel_reduce) +EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) +EXPOSE_INIT(impl::kokkosp_init_library) +EXPOSE_FINALIZE(impl::kokkosp_finalize_library) +EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) +EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) +EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) +EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) +EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) +EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) } // end extern "C" From 4e5c4a418339f7cd3870cb05d4e851bf46a0916b Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 20 Jul 2023 16:22:52 -0700 Subject: [PATCH 09/22] Update kp_sampler_skip.cpp Fixing to use float rather than int for sampling probability --- common/kokkos-sampler/kp_sampler_skip.cpp | 52 ++++++++++++----------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 0f04adda8..208ae5b70 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -5,14 +5,13 @@ #include #include "../../profiling/all/kp_core.hpp" #include "kp_config.hpp" -#include -#include +#include // for random number generation namespace KokkosTools { namespace Sampler { static uint64_t uniqID = 0; -static uint64_t kernelSampleSkip = 101; -static int tool_prob_num = 100; +static uint64_t kernelSampleSkip = 101; // Default skip rate of every 100 invocations +static float tool_prob_num = 1.0; // Default probability of 1 percent of all invocations static int tool_verbosity = 0; static int tool_globFence = 0; @@ -58,11 +57,11 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, char* profileLibrary = getenv("KOKKOS_TOOLS_LIBS"); if (NULL == profileLibrary) { printf( - "Checking KOKKOS_PROFILE_LIBRARY. WARNING: This is a depreciated " - "variable. Please use KOKKOS_TOOLS_LIBS\n"); + "KokkosP: Checking KOKKOS_PROFILE_LIBRARY. WARNING: This is a deprecated " + "variable. Please use KOKKOS_TOOLS_LIBS. \n"); profileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); if (NULL == profileLibrary) { - printf("KokkosP: No library to call in %s\n", profileLibrary); + printf("KokkosP: No library to call in %s.\n", profileLibrary); exit(-1); } } @@ -148,19 +147,22 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, } if (NULL != tool_probability) { - // read sampling probability as an integer between 1 and 100, but - // programs reasons about probability as a double between 0.0 and 1.0. - tool_prob_num = atoi(tool_probability); - if (tool_prob_num > 100) { + // read sampling probability as an float between 0 and 100, representing + // a percentage that data should be gathered. + // Connector reasons about probability as a double between 0.0 and 1.0. + tool_prob_num = atof(tool_probability); + if (tool_prob_num > 100.0) { printf( - "KokkosP: Tool sample probability was set to be greater than 100. " - "Setting to 100.\n"); - tool_prob_num = 100; - } else if (tool_prob_num < 0) { + "KokkosP: The sampling probability value is set to be greater than 100.0. " + "Setting sampling probability to 100 percent; all of the " + "invocations of a Kokkos Kernel will be profiled.\n"); + tool_prob_num = 100.0; + } else if (tool_prob_num < 0.0) { printf( - "KokkosP: Tool sample probability was set to be less than 0. Setting " - "to 0.\n"); - tool_prob_num = 0; + "KokkosP: The sampling probability value is set to be negative number. Setting " + "sampling probability to 0 percent; none of the invocations of " + "a Kokkos Kernel will be profiled.\n"); + tool_prob_num = 0.0; } } // srand48((unsigned)clock()); @@ -177,9 +179,11 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, if ((NULL != tool_probability) && (NULL != tool_sample)) { printf( - "KokkosP: Note that both probability and skip rate are set. Kokkos " - "Tools Sampler utility will invoke Kokkos Tool child event with a " - "probability at the skip rate.\n"); + "KokkosP: Note that both probability and skip rate are set. The Kokkos " + "Tools Sampler utility will invoke a Kokkos Tool child event you specified " + "(e.g., the profiler or debugger tool connector you specified " + "in KOKKOS_TOOLS_LIBS) with the specified sampling probability applied to the " + "specified sampling skip rate set.\n"); } } // end kokkosp_init_library @@ -193,7 +197,7 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, static uint64_t invocationNum; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { - if ((rand() % 100) < tool_prob_num) { + if ((rand()/RAND_MAX) < (tool_prob_num/100.0)) { *kID = 1; // set kernel ID to 1 so that it is matched with the end. if (tool_verbosity > 0) { printf( @@ -229,7 +233,7 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, static uint64_t invocationNum; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { - if ((rand() % 100) < tool_prob_num) { + if ((rand()/RAND_MAX) < (tool_prob_num/100.0)) { *kID = 1; // set kernel ID to 1 so that it is matched with the end. if (tool_verbosity > 0) { printf( @@ -264,7 +268,7 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, static uint64_t invocationNum; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { - if ((rand() % 100) < tool_prob_num) { + if ((rand()/RAND_MAX) < tool_prob_num/100.0) { if (tool_verbosity > 0) { printf( "KokkosP: sample %llu (a parallel_reduce on its invocation number " From d7aa5bc123e9bf9c3ff109ffac73c2937e708867 Mon Sep 17 00:00:00 2001 From: Vivek Kale Date: Thu, 20 Jul 2023 18:14:22 -0700 Subject: [PATCH 10/22] Applied clang-format-8 --- common/kokkos-sampler/kp_sampler_skip.cpp | 37 ++++++++++++++--------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 208ae5b70..4f6aa411a 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -5,15 +5,17 @@ #include #include "../../profiling/all/kp_core.hpp" #include "kp_config.hpp" -#include // for random number generation +#include // for random number generation namespace KokkosTools { namespace Sampler { -static uint64_t uniqID = 0; -static uint64_t kernelSampleSkip = 101; // Default skip rate of every 100 invocations -static float tool_prob_num = 1.0; // Default probability of 1 percent of all invocations -static int tool_verbosity = 0; -static int tool_globFence = 0; +static uint64_t uniqID = 0; +static uint64_t kernelSampleSkip = + 101; // Default skip rate of every 100 invocations +static float tool_prob_num = + 1.0; // Default probability of 1 percent of all invocations +static int tool_verbosity = 0; +static int tool_globFence = 0; typedef void (*initFunction)(const int, const uint64_t, const uint32_t, void*); typedef void (*finalizeFunction)(); @@ -57,7 +59,8 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, char* profileLibrary = getenv("KOKKOS_TOOLS_LIBS"); if (NULL == profileLibrary) { printf( - "KokkosP: Checking KOKKOS_PROFILE_LIBRARY. WARNING: This is a deprecated " + "KokkosP: Checking KOKKOS_PROFILE_LIBRARY. WARNING: This is a " + "deprecated " "variable. Please use KOKKOS_TOOLS_LIBS. \n"); profileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); if (NULL == profileLibrary) { @@ -153,13 +156,15 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, tool_prob_num = atof(tool_probability); if (tool_prob_num > 100.0) { printf( - "KokkosP: The sampling probability value is set to be greater than 100.0. " + "KokkosP: The sampling probability value is set to be greater than " + "100.0. " "Setting sampling probability to 100 percent; all of the " "invocations of a Kokkos Kernel will be profiled.\n"); tool_prob_num = 100.0; } else if (tool_prob_num < 0.0) { printf( - "KokkosP: The sampling probability value is set to be negative number. Setting " + "KokkosP: The sampling probability value is set to be negative " + "number. Setting " "sampling probability to 0 percent; none of the invocations of " "a Kokkos Kernel will be profiled.\n"); tool_prob_num = 0.0; @@ -180,9 +185,11 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, if ((NULL != tool_probability) && (NULL != tool_sample)) { printf( "KokkosP: Note that both probability and skip rate are set. The Kokkos " - "Tools Sampler utility will invoke a Kokkos Tool child event you specified " - "(e.g., the profiler or debugger tool connector you specified " - "in KOKKOS_TOOLS_LIBS) with the specified sampling probability applied to the " + "Tools Sampler utility will invoke a Kokkos Tool child event you " + "specified " + "(e.g., the profiler or debugger tool connector you specified " + "in KOKKOS_TOOLS_LIBS) with the specified sampling probability applied " + "to the " "specified sampling skip rate set.\n"); } } // end kokkosp_init_library @@ -197,7 +204,7 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, static uint64_t invocationNum; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { - if ((rand()/RAND_MAX) < (tool_prob_num/100.0)) { + if ((rand() / RAND_MAX) < (tool_prob_num / 100.0)) { *kID = 1; // set kernel ID to 1 so that it is matched with the end. if (tool_verbosity > 0) { printf( @@ -233,7 +240,7 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, static uint64_t invocationNum; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { - if ((rand()/RAND_MAX) < (tool_prob_num/100.0)) { + if ((rand() / RAND_MAX) < (tool_prob_num / 100.0)) { *kID = 1; // set kernel ID to 1 so that it is matched with the end. if (tool_verbosity > 0) { printf( @@ -268,7 +275,7 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, static uint64_t invocationNum; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { - if ((rand()/RAND_MAX) < tool_prob_num/100.0) { + if ((rand() / RAND_MAX) < tool_prob_num / 100.0) { if (tool_verbosity > 0) { printf( "KokkosP: sample %llu (a parallel_reduce on its invocation number " From 325495148c4b20b6a3c1738749172f15f884e1d2 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Sun, 30 Jul 2023 20:29:17 -0700 Subject: [PATCH 11/22] committing kp sampler with fix to scan --- common/kokkos-sampler/kp_sampler_skip.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 4f6aa411a..5c160eb7d 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -277,12 +277,13 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, if ((invocationNum % kernelSampleSkip) == 0) { if ((rand() / RAND_MAX) < tool_prob_num / 100.0) { if (tool_verbosity > 0) { - printf( + *kID = 1; + printf( "KokkosP: sample %llu (a parallel_reduce on its invocation number " "%d) calling child-begin function...\n", (unsigned long long)(*kID), (int)invocationNum); } - *kID = 1; + if (NULL != beginReduceCallee) { (*beginReduceCallee)(name, devID, kID); } From f236b87b990a99638f9e2940d18d498f365e779e Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Sun, 30 Jul 2023 20:45:28 -0700 Subject: [PATCH 12/22] coommitting delete commented code --- common/kokkos-sampler/kp_sampler_skip.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 5c160eb7d..c4b8165db 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -169,9 +169,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, "a Kokkos Kernel will be profiled.\n"); tool_prob_num = 0.0; } - } - // srand48((unsigned)clock()); - // seed48(0); + }; if (tool_verbosity > 0) { printf("KokkosP: Sampling rate set to: %s\n", tool_sample); From c05a65f13f1c346253f65861af6821ed4d217b5e Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Sun, 30 Jul 2023 20:52:56 -0700 Subject: [PATCH 13/22] fix kp sampler skip formatting --- common/kokkos-sampler/kp_sampler_skip.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index c4b8165db..1772a411f 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -276,7 +276,7 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, if ((rand() / RAND_MAX) < tool_prob_num / 100.0) { if (tool_verbosity > 0) { *kID = 1; - printf( + printf( "KokkosP: sample %llu (a parallel_reduce on its invocation number " "%d) calling child-begin function...\n", (unsigned long long)(*kID), (int)invocationNum); From 79ebc63591293850bafb38f20bc54babd14175f4 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Mon, 31 Jul 2023 09:56:49 -0700 Subject: [PATCH 14/22] fix to randomization float conversation and clang-formatting --- common/kokkos-sampler/kp_sampler_skip.cpp | 67 +++++++++++++++++------ 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 1772a411f..3c6e40564 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -176,7 +176,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, printf("KokkosP: Sampling probability set to: %s\n", tool_probability); printf( "KokkosP: seeding Random Number Generator using clock for " - "probabilistic sampling\n"); + "probabilistic sampling.\n"); } srand(time(NULL)); @@ -199,29 +199,41 @@ void kokkosp_finalize_library() { void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { *kID = 0; + static uint64_t invocationNum; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { - if ((rand() / RAND_MAX) < (tool_prob_num / 100.0)) { - *kID = 1; // set kernel ID to 1 so that it is matched with the end. + if ((rand() / (1.0 * RAND_MAX)) < (tool_prob_num / 100.0)) { + *kID = 1; + if (tool_verbosity > 0) { printf( - "KokkosP: sample %llu on (a parallel_for on its invocation number " - "%d) calling child-begin function...\n", + "KokkosP: sample %llu of parallel_for on its invocation number %d " + "calling" + "child-begin function...\n", (unsigned long long)(*kID), (int)invocationNum); } if (NULL != beginForCallee) { (*beginForCallee)(name, devID, kID); } + if (tool_verbosity > 1) + printf( + "KokkosP: sample for a parallel_for with kernel ID %llu on its " + "invocation number " + "%d called child-begin function...\n", + (unsigned long long)(*kID), (int)invocationNum); } - } + } // end sampling + } // kokkosp_begin_parallel_for void kokkosp_end_parallel_for(const uint64_t kID) { - if (kID > 0) { + if (kID > + 0) { // the corresponding kokkosp_begin_parallel_for gathered a sample if (tool_verbosity > 0) { printf( - "KokkosP: sample %llu (a parallel_for) calling child-end " + "KokkosP: sample for a parallel_for with kernel ID %llu calling " + "child-end " "function...\n", (unsigned long long)(kID)); } @@ -229,7 +241,9 @@ void kokkosp_end_parallel_for(const uint64_t kID) { if (NULL != endForCallee) { (*endForCallee)(kID); } - } + + } // end kID > 0 + } // kokkosp_end_parallel_for void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, @@ -238,19 +252,27 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, static uint64_t invocationNum; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { - if ((rand() / RAND_MAX) < (tool_prob_num / 100.0)) { + if (rand() / (1.0 * RAND_MAX) < (tool_prob_num / 100.0)) { *kID = 1; // set kernel ID to 1 so that it is matched with the end. if (tool_verbosity > 0) { printf( - "KokkosP: sample %llu (parallel_scan on its invocation num %d) " - "calling child-begin function...\n", + "KokkosP: sample %llu for parallel_scan on its invocation number " + "%d calling " + "child-begin function...\n", (unsigned long long)(*kID), (int)invocationNum); } if (NULL != beginScanCallee) { (*beginScanCallee)(name, devID, kID); } + if (tool_verbosity > 0) { + printf( + "KokkosP: sample for parallel_scan with kernelID %llu on its " + "invocation number %d " + "called child-begin function...\n", + (unsigned long long)(*kID), (int)invocationNum); + } } - } + } // end sampling } // kokkosp_begin_parallel_scan void kokkosp_end_parallel_scan(const uint64_t kID) { @@ -273,20 +295,29 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, static uint64_t invocationNum; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { - if ((rand() / RAND_MAX) < tool_prob_num / 100.0) { + if ((rand() / (1.0 * RAND_MAX)) < tool_prob_num / 100.0) { if (tool_verbosity > 0) { - *kID = 1; printf( - "KokkosP: sample %llu (a parallel_reduce on its invocation number " - "%d) calling child-begin function...\n", + "KokkosP: sample %llu for a parallel_reduce on its invocation " + "number " + "%d calling child-begin function...\n", (unsigned long long)(*kID), (int)invocationNum); } + *kID = 1; // set kernel ID to 1 so that it is matched with the end. if (NULL != beginReduceCallee) { (*beginReduceCallee)(name, devID, kID); } + + if (tool_verbosity > 1) { + printf( + "KokkosP: sample for parallel_reduce with kID %llu on its " + "invocation number " + "%d called child-begin function...\n", + (unsigned long long)(*kID), (int)invocationNum); + } } - } + } // end sampling } // kokkosp_begin_parallel_reduce void kokkosp_end_parallel_reduce(const uint64_t kID) { From 75b23d789ed0415c20bb49592979ff7ad2abfbf2 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:09:00 -0700 Subject: [PATCH 15/22] Readme --- common/kokkos-sampler/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/kokkos-sampler/README.md b/common/kokkos-sampler/README.md index 3d0c6393a..d1e4438f7 100644 --- a/common/kokkos-sampler/README.md +++ b/common/kokkos-sampler/README.md @@ -1,4 +1,4 @@ -This is a sampler utility that is intended to complement other tools in the Kokkos Tools set. This utility allows for sampling (rather than collecting) of profiling or debugging data gathered from a particular tool of the Kokkos Tools set. The Kokkos Tools user provides a sampling rate via the environment variable KOKKOS_TOOLS_SAMPLER_SKIP. +This is a sampler utility that is intended to complement other tools in the Kokkos Tools set. This utility allows for sampling (rather than collecting) of profiling or debugging data gathered from a particular tool of the Kokkos Tools set. The Kokkos Tools user provides a sampling rate via the environment variable KOKKOS_TOOLS_SAMPLER_SKIP. It also has an environment variable for sampling probability, KOKKOS_TOOLS_SAMPLER_PROBABILITY that is a percent, taken as a float, between 0.0 and 100.0. In order for the state of the sampled profiling and logging data in memory to be captured at the time of the utility's callback invocation, it might be important to enforce fences. However, this also means that there are more synchronization points compared with running the program without the tool. This fencing behavior can be controlled by setting the environment variable `KOKKOS_TOOLS_GLOBALFENCES`. A non-zero value implies global fences on invocation of the tool. The default is not to introduce extra fences. From cf105ff5f7afc968aebbea01f8d92b2bbcb451ff Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Wed, 6 Sep 2023 15:44:36 -0700 Subject: [PATCH 16/22] Fix tool glob fence to bool --- common/kokkos-sampler/kp_sampler_skip.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 3c6e40564..ff543dbd4 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -15,7 +15,7 @@ static uint64_t kernelSampleSkip = static float tool_prob_num = 1.0; // Default probability of 1 percent of all invocations static int tool_verbosity = 0; -static int tool_globFence = 0; +static bool tool_globFence = 0; typedef void (*initFunction)(const int, const uint64_t, const uint32_t, void*); typedef void (*finalizeFunction)(); @@ -33,10 +33,11 @@ static endFunction endReduceCallee = NULL; void kokkosp_request_tool_settings(const uint32_t, Kokkos_Tools_ToolSettings* settings) { - if (0 == tool_globFence) { - settings->requires_global_fencing = false; - } else { + settings->requires_global_fencing = true; + if (tool_globFence) { settings->requires_global_fencing = true; + } else { + settings->requires_global_fencing = false; } } @@ -51,9 +52,9 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, tool_verbosity = 0; } if (NULL != tool_globFence_str) { - tool_globFence = atoi(tool_globFence_str); + tool_globFence = (atoi(tool_global_fences) != 0); } else { - tool_globFence = 0; + tool_globFence = false; } char* profileLibrary = getenv("KOKKOS_TOOLS_LIBS"); From a9cb70673ba9e29281f040988c9b9040ffbe631e Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Wed, 6 Sep 2023 15:55:02 -0700 Subject: [PATCH 17/22] set new defaults of tool prob num and kernelSampleSkip maximum uInt64_t for kernelSampleSkip and -1.0 for tool prob num --- common/kokkos-sampler/kp_sampler_skip.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index ff543dbd4..bf6d1a181 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -11,11 +11,11 @@ namespace KokkosTools { namespace Sampler { static uint64_t uniqID = 0; static uint64_t kernelSampleSkip = - 101; // Default skip rate of every 100 invocations + std::numeric_limits::max; // Default skip rate to max static float tool_prob_num = - 1.0; // Default probability of 1 percent of all invocations + -1.0; // Default probability of undefined percent of all invocations static int tool_verbosity = 0; -static bool tool_globFence = 0; +static int tool_globFence = 0; typedef void (*initFunction)(const int, const uint64_t, const uint32_t, void*); typedef void (*finalizeFunction)(); @@ -33,11 +33,10 @@ static endFunction endReduceCallee = NULL; void kokkosp_request_tool_settings(const uint32_t, Kokkos_Tools_ToolSettings* settings) { - settings->requires_global_fencing = true; - if (tool_globFence) { - settings->requires_global_fencing = true; + if (0 == tool_globFence) { + settings->requires_global_fencing = 0; } else { - settings->requires_global_fencing = false; + settings->requires_global_fencing = 1; } } @@ -52,9 +51,9 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, tool_verbosity = 0; } if (NULL != tool_globFence_str) { - tool_globFence = (atoi(tool_global_fences) != 0); + tool_globFence = atoi(tool_global_fences); } else { - tool_globFence = false; + tool_globFence = 0; } char* profileLibrary = getenv("KOKKOS_TOOLS_LIBS"); @@ -170,7 +169,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, "a Kokkos Kernel will be profiled.\n"); tool_prob_num = 0.0; } - }; + } if (tool_verbosity > 0) { printf("KokkosP: Sampling rate set to: %s\n", tool_sample); From 5db6e8c505fd3893c2895881ab6fdef5f56fd863 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Wed, 6 Sep 2023 16:20:04 -0700 Subject: [PATCH 18/22] Error check and handle case when both skip rate and probability set In this case, only use the probability set Note: an alternative is to gracefully exit. Feedback welcome here. --- common/kokkos-sampler/kp_sampler_skip.cpp | 34 +++++++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index bf6d1a181..4b3754099 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -170,10 +170,27 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, tool_prob_num = 0.0; } } - + if ((tool_prob_num < 0.0 ) && (kernelSampleSkip == std::numeric_limits::max)) { + if (tool_verbosity > 0) { + printf("KokkosP: Neither sampling utility's probability for sampling " + "nor sampling utility's skip rate were set. \n"); + } + tool_prob_num = 10.0; + if (tool_verbosity > 0) { + printf("KokkosP: Set the sampling utility's probability " + "for sampling to be %f percent. Sampler's skip rate " + "will not be used.\n", tool_prob_num); + } + } + if (tool_verbosity > 0) { - printf("KokkosP: Sampling rate set to: %s\n", tool_sample); - printf("KokkosP: Sampling probability set to: %s\n", tool_probability); + if (tool_verbosity > 1) + { + printf("KokkosP: Sampling rate provided as input: %s\n", tool_sample); + printf("KokkosP: Sampling probability provided as input: %s\n", tool_probability); + } + printf("KokkosP: Sampling rate set to: %llu\n", kernelSampleSkip); + printf("KokkosP: Sampling probability set to %f\n", tool_prob_num); printf( "KokkosP: seeding Random Number Generator using clock for " "probabilistic sampling.\n"); @@ -186,10 +203,15 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, "Tools Sampler utility will invoke a Kokkos Tool child event you " "specified " "(e.g., the profiler or debugger tool connector you specified " - "in KOKKOS_TOOLS_LIBS) with the specified sampling probability applied " - "to the " - "specified sampling skip rate set.\n"); + "in KOKKOS_TOOLS_LIBS) with only specified sampling probability applied " + "and sampling skip rate set is ignored with no " + "predefined periodicity for sampling used.\n"); + } + if (tool_verbosity > 0) { + printf("KokkosP: The skip rate in the sampler utility " + "is being set to 1.\n"); } + kernelSampleSkip = 1; } // end kokkosp_init_library void kokkosp_finalize_library() { From 278bfe008edd57e86714a09416924734aec0ece7 Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 7 Sep 2023 09:22:15 -0700 Subject: [PATCH 19/22] fixing the sampler's minimum skip rate so it is zero in order fix error of pointer to uint64_t comparison --- common/kokkos-sampler/kp_sampler_skip.cpp | 91 ++++++++++++----------- 1 file changed, 49 insertions(+), 42 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 4b3754099..592c4ba51 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -6,12 +6,12 @@ #include "../../profiling/all/kp_core.hpp" #include "kp_config.hpp" #include // for random number generation +#include namespace KokkosTools { namespace Sampler { -static uint64_t uniqID = 0; -static uint64_t kernelSampleSkip = - std::numeric_limits::max; // Default skip rate to max +static uint64_t uniqID = 0; +static int64_t kernelSampleSkip = 0; static float tool_prob_num = -1.0; // Default probability of undefined percent of all invocations static int tool_verbosity = 0; @@ -44,6 +44,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE"); const char* tool_globFence_str = getenv("KOKKOS_TOOLS_GLOBALFENCES"); + kernelSampleSkip = 0; // use min for undefined skip rate if (NULL != tool_verbose_str) { tool_verbosity = atoi(tool_verbose_str); @@ -51,7 +52,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, tool_verbosity = 0; } if (NULL != tool_globFence_str) { - tool_globFence = atoi(tool_global_fences); + tool_globFence = atoi(tool_globFence_str); } else { tool_globFence = 0; } @@ -170,26 +171,30 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, tool_prob_num = 0.0; } } - if ((tool_prob_num < 0.0 ) && (kernelSampleSkip == std::numeric_limits::max)) { - if (tool_verbosity > 0) { - printf("KokkosP: Neither sampling utility's probability for sampling " - "nor sampling utility's skip rate were set. \n"); - } - tool_prob_num = 10.0; - if (tool_verbosity > 0) { - printf("KokkosP: Set the sampling utility's probability " - "for sampling to be %f percent. Sampler's skip rate " - "will not be used.\n", tool_prob_num); - } + if ((tool_prob_num < 0.0) && (kernelSampleSkip == 0)) { + if (tool_verbosity > 0) { + printf( + "KokkosP: Neither sampling utility's probability for sampling " + "nor sampling utility's skip rate were set. \n"); + } + tool_prob_num = 10.0; + if (tool_verbosity > 0) { + printf( + "KokkosP: Set the sampling utility's probability " + "for sampling to be %f percent. Sampler's skip rate " + "will not be used.\n", + tool_prob_num); + } } - + if (tool_verbosity > 0) { - if (tool_verbosity > 1) - { + if (tool_verbosity > 1) { printf("KokkosP: Sampling rate provided as input: %s\n", tool_sample); - printf("KokkosP: Sampling probability provided as input: %s\n", tool_probability); + printf("KokkosP: Sampling probability provided as input: %s\n", + tool_probability); } - printf("KokkosP: Sampling rate set to: %llu\n", kernelSampleSkip); + printf("KokkosP: Sampling rate set to: %llu\n", + (unsigned long long)(kernelSampleSkip)); printf("KokkosP: Sampling probability set to %f\n", tool_prob_num); printf( "KokkosP: seeding Random Number Generator using clock for " @@ -203,15 +208,17 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, "Tools Sampler utility will invoke a Kokkos Tool child event you " "specified " "(e.g., the profiler or debugger tool connector you specified " - "in KOKKOS_TOOLS_LIBS) with only specified sampling probability applied " + "in KOKKOS_TOOLS_LIBS) with only specified sampling probability " + "applied " "and sampling skip rate set is ignored with no " "predefined periodicity for sampling used.\n"); } if (tool_verbosity > 0) { - printf("KokkosP: The skip rate in the sampler utility " - "is being set to 1.\n"); + printf( + "KokkosP: The skip rate in the sampler utility " + "is being set to 1.\n"); } - kernelSampleSkip = 1; + kernelSampleSkip = 1; } // end kokkosp_init_library void kokkosp_finalize_library() { @@ -220,14 +227,11 @@ void kokkosp_finalize_library() { void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { - *kID = 0; - + *kID = uniqID++; static uint64_t invocationNum; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { if ((rand() / (1.0 * RAND_MAX)) < (tool_prob_num / 100.0)) { - *kID = 1; - if (tool_verbosity > 0) { printf( "KokkosP: sample %llu of parallel_for on its invocation number %d " @@ -235,8 +239,10 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, "child-begin function...\n", (unsigned long long)(*kID), (int)invocationNum); } + uint64_t* nestedkID; + *nestedkID = 0; if (NULL != beginForCallee) { - (*beginForCallee)(name, devID, kID); + (*beginForCallee)(name, devID, nestedkID); } if (tool_verbosity > 1) printf( @@ -250,8 +256,8 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, } // kokkosp_begin_parallel_for void kokkosp_end_parallel_for(const uint64_t kID) { - if (kID > - 0) { // the corresponding kokkosp_begin_parallel_for gathered a sample + // match the corresponding kokkosp_begin_parallel_for gathered a sample + if (kID == uniqID) { if (tool_verbosity > 0) { printf( "KokkosP: sample for a parallel_for with kernel ID %llu calling " @@ -263,19 +269,16 @@ void kokkosp_end_parallel_for(const uint64_t kID) { if (NULL != endForCallee) { (*endForCallee)(kID); } - - } // end kID > 0 - + } } // kokkosp_end_parallel_for void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { - *kID = 0; + *kID = uniqID++; // set kernel ID to a uniqID to match it static uint64_t invocationNum; ++invocationNum; if ((invocationNum % kernelSampleSkip) == 0) { if (rand() / (1.0 * RAND_MAX) < (tool_prob_num / 100.0)) { - *kID = 1; // set kernel ID to 1 so that it is matched with the end. if (tool_verbosity > 0) { printf( "KokkosP: sample %llu for parallel_scan on its invocation number " @@ -283,8 +286,10 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, "child-begin function...\n", (unsigned long long)(*kID), (int)invocationNum); } + uint64_t* nestedkID; + *nestedkID = 0; if (NULL != beginScanCallee) { - (*beginScanCallee)(name, devID, kID); + (*beginScanCallee)(name, devID, nestedkID); } if (tool_verbosity > 0) { printf( @@ -298,7 +303,8 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, } // kokkosp_begin_parallel_scan void kokkosp_end_parallel_scan(const uint64_t kID) { - if (kID > 0) { + // match corresponding scan with kernel ID kID + if (kID == uniqID) { if (tool_verbosity > 0) { printf( "KokkosP: sample %llu (a parallel_scan) calling child-end " @@ -313,9 +319,9 @@ void kokkosp_end_parallel_scan(const uint64_t kID) { void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { - *kID = 0; static uint64_t invocationNum; ++invocationNum; + *kID = uniqID++; if ((invocationNum % kernelSampleSkip) == 0) { if ((rand() / (1.0 * RAND_MAX)) < tool_prob_num / 100.0) { if (tool_verbosity > 0) { @@ -326,9 +332,10 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, (unsigned long long)(*kID), (int)invocationNum); } - *kID = 1; // set kernel ID to 1 so that it is matched with the end. + uint64_t* nestedkID; // set nested kernel ID + *nestedkID = 0; if (NULL != beginReduceCallee) { - (*beginReduceCallee)(name, devID, kID); + (*beginReduceCallee)(name, devID, nestedkID); } if (tool_verbosity > 1) { @@ -343,7 +350,7 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, } // kokkosp_begin_parallel_reduce void kokkosp_end_parallel_reduce(const uint64_t kID) { - if (kID > 0) { + if (kID == uniqID) { if (tool_verbosity > 0) { printf( "KokkosP: sample %llu (a parallel_reduce) calling child-end " From 687bdd27342942cb0782ce043e1bcbf49b1617da Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 7 Sep 2023 09:38:47 -0700 Subject: [PATCH 20/22] fixing the sampler's nestedkID init in parallel reduce --- common/kokkos-sampler/kp_sampler_skip.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index 592c4ba51..aa2c6fa48 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -240,7 +240,7 @@ void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, (unsigned long long)(*kID), (int)invocationNum); } uint64_t* nestedkID; - *nestedkID = 0; + *nestedkID = 1; if (NULL != beginForCallee) { (*beginForCallee)(name, devID, nestedkID); } @@ -287,7 +287,7 @@ void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, (unsigned long long)(*kID), (int)invocationNum); } uint64_t* nestedkID; - *nestedkID = 0; + *nestedkID = 1; if (NULL != beginScanCallee) { (*beginScanCallee)(name, devID, nestedkID); } @@ -333,7 +333,7 @@ void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, } uint64_t* nestedkID; // set nested kernel ID - *nestedkID = 0; + *nestedkID = 1; if (NULL != beginReduceCallee) { (*beginReduceCallee)(name, devID, nestedkID); } From b6060989492983451f3de0961db73b7c729049bb Mon Sep 17 00:00:00 2001 From: Vivek Kale Date: Thu, 7 Sep 2023 10:20:10 -0700 Subject: [PATCH 21/22] putting numeric limits for kernelSampleSkip back in --- common/kokkos-sampler/Makefile | 2 +- common/kokkos-sampler/kp_sampler_skip.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/common/kokkos-sampler/Makefile b/common/kokkos-sampler/Makefile index 862cae8fa..ce5f56ca1 100644 --- a/common/kokkos-sampler/Makefile +++ b/common/kokkos-sampler/Makefile @@ -1,4 +1,4 @@ -CXX = clang++ +CXX = g++ CXXFLAGS = -O3 -std=c++17 -g diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index aa2c6fa48..b5576a2ca 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -11,7 +11,7 @@ namespace KokkosTools { namespace Sampler { static uint64_t uniqID = 0; -static int64_t kernelSampleSkip = 0; +static int64_t kernelSampleSkip = std::numeric_limits::max(); static float tool_prob_num = -1.0; // Default probability of undefined percent of all invocations static int tool_verbosity = 0; @@ -44,7 +44,6 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void* deviceInfo) { const char* tool_verbose_str = getenv("KOKKOS_TOOLS_SAMPLER_VERBOSE"); const char* tool_globFence_str = getenv("KOKKOS_TOOLS_GLOBALFENCES"); - kernelSampleSkip = 0; // use min for undefined skip rate if (NULL != tool_verbose_str) { tool_verbosity = atoi(tool_verbose_str); @@ -171,7 +170,7 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, tool_prob_num = 0.0; } } - if ((tool_prob_num < 0.0) && (kernelSampleSkip == 0)) { + if ((tool_prob_num < 0.0) && (kernelSampleSkip == std::numeric_limits::max())) { if (tool_verbosity > 0) { printf( "KokkosP: Neither sampling utility's probability for sampling " From 82b2423a30fd72d3f697ac789217ead4ae9aa66d Mon Sep 17 00:00:00 2001 From: Vivek Kale <11766050+vlkale@users.noreply.github.com> Date: Thu, 7 Sep 2023 10:23:58 -0700 Subject: [PATCH 22/22] applied clang format --- common/kokkos-sampler/kp_sampler_skip.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/common/kokkos-sampler/kp_sampler_skip.cpp b/common/kokkos-sampler/kp_sampler_skip.cpp index b5576a2ca..64a819909 100644 --- a/common/kokkos-sampler/kp_sampler_skip.cpp +++ b/common/kokkos-sampler/kp_sampler_skip.cpp @@ -170,7 +170,8 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, tool_prob_num = 0.0; } } - if ((tool_prob_num < 0.0) && (kernelSampleSkip == std::numeric_limits::max())) { + if ((tool_prob_num < 0.0) && + (kernelSampleSkip == std::numeric_limits::max())) { if (tool_verbosity > 0) { printf( "KokkosP: Neither sampling utility's probability for sampling "