diff --git a/CMakeLists.txt b/CMakeLists.txt index bf815472c..428402c1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,7 @@ endif() include(cmake/configure_variorum.cmake) set(KOKKOSTOOLS_HAS_CALIPER ${KokkosTools_ENABLE_CALIPER}) -set(KOKKOSTOOLS_HAS_NVPROF ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvprof should be available +set(KOKKOSTOOLS_HAS_NVTX ${Kokkos_ENABLE_CUDA}) # we assume that enabling CUDA for Kokkos program means nvtx should be available if(DEFINED ENV{VTUNE_HOME}) set(VTune_ROOT $ENV{VTUNE_HOME}) @@ -180,7 +180,7 @@ endif() # GPU profilers if(Kokkos_ENABLE_CUDA) add_subdirectory(profiling/nvtx-connector) - add_subdirectory(profiling/nvprof-focused-connector) + add_subdirectory(profiling/nvtx-focused-connector) endif() if(Kokkos_ENABLE_HIP) add_subdirectory(profiling/roctx-connector) diff --git a/common/kp_config.hpp.in b/common/kp_config.hpp.in index 01d36cd86..77c160870 100644 --- a/common/kp_config.hpp.in +++ b/common/kp_config.hpp.in @@ -2,7 +2,7 @@ #define USE_MPI @KOKKOSTOOLS_HAS_MPI@ -#cmakedefine KOKKOSTOOLS_HAS_NVPROF +#cmakedefine KOKKOSTOOLS_HAS_NVTX #cmakedefine KOKKOSTOOLS_HAS_CALIPER #cmakedefine KOKKOSTOOLS_HAS_SYSTEMTAP #cmakedefine KOKKOSTOOLS_HAS_VARIORUM diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 98bbae643..e7490dbcb 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -44,7 +44,7 @@ if(KOKKOSTOOLS_HAS_VTUNE) add_kp_test(vtune_connector "vtune-connector") add_kp_test(vtune_focused_connector "vtune-focused-connector") endif() -if(KOKKOSTOOLS_HAS_NVPROF) - add_kp_test(vtune_connector "nvprof-connector") - add_kp_test(vtune_focused_connector "nvprof-focused-connector") +if(KOKKOSTOOLS_HAS_NVTX) + add_kp_test(nvtx_connector "nvtx-connector") + add_kp_test(nvtx_focused_connector "nvtx-focused-connector") endif() diff --git a/profiling/all/kp_all.cpp b/profiling/all/kp_all.cpp index a1c9683a6..67419b039 100644 --- a/profiling/all/kp_all.cpp +++ b/profiling/all/kp_all.cpp @@ -48,9 +48,9 @@ KOKKOSTOOLS_EXTERN_EVENT_SET(VTuneFocusedConnector) #ifdef KOKKOSTOOLS_HAS_VARIORUM KOKKOSTOOLS_EXTERN_EVENT_SET(VariorumConnector) #endif -#ifdef KOKKOSTOOLS_HAS_NVPROF +#ifdef KOKKOSTOOLS_HAS_NVTX KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXConnector) -KOKKOSTOOLS_EXTERN_EVENT_SET(NVProfFocusedConnector) +KOKKOSTOOLS_EXTERN_EVENT_SET(NVTXFocusedConnector) #endif #ifdef KOKKOSTOOLS_HAS_CALIPER namespace cali { @@ -90,10 +90,9 @@ EventSet get_event_set(const char* profiler, const char* config_str) { #ifdef KOKKOSTOOLS_HAS_CALIPER handlers["caliper"] = cali::get_kokkos_event_set(config_str); #endif -#ifdef KOKKOSTOOLS_HAS_NVPROF - handlers["nvtx-connector"] = NVTXConnector::get_event_set(); - handlers["nvprof-focused-connector"] = - NVProfFocusedConnector::get_event_set(); +#ifdef KOKKOSTOOLS_HAS_NVTX + handlers["nvtx-connector"] = NVTXConnector::get_event_set(); + handlers["nvtx-focused-connector"] = NVTXFocusedConnector::get_event_set(); #endif auto e = handlers.find(profiler); if (e != handlers.end()) return e->second; diff --git a/profiling/nvprof-focused-connector/CMakeLists.txt b/profiling/nvprof-focused-connector/CMakeLists.txt deleted file mode 100644 index 072198bf5..000000000 --- a/profiling/nvprof-focused-connector/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -find_package(CUDAToolkit REQUIRED) -kp_add_library(kp_nvprof_focused_connector kp_nvprof_focused_connector.cpp) - -target_link_libraries(kp_nvprof_focused_connector CUDA::nvToolsExt) \ No newline at end of file diff --git a/profiling/nvtx-focused-connector/CMakeLists.txt b/profiling/nvtx-focused-connector/CMakeLists.txt new file mode 100644 index 000000000..e75e93469 --- /dev/null +++ b/profiling/nvtx-focused-connector/CMakeLists.txt @@ -0,0 +1,4 @@ +find_package(CUDAToolkit REQUIRED) +kp_add_library(kp_nvtx_focused_connector kp_nvtx_focused_connector.cpp) + +target_link_libraries(kp_nvtx_focused_connector CUDA::nvToolsExt) diff --git a/profiling/nvprof-focused-connector/Makefile b/profiling/nvtx-focused-connector/Makefile similarity index 51% rename from profiling/nvprof-focused-connector/Makefile rename to profiling/nvtx-focused-connector/Makefile index 06628279d..5c33818ae 100644 --- a/profiling/nvprof-focused-connector/Makefile +++ b/profiling/nvtx-focused-connector/Makefile @@ -4,15 +4,15 @@ LDFLAGS=-L$(CUDA_ROOT)/lib64 LIBS=-lnvToolsExt -lcudart SHARED_CXXFLAGS=-shared -fPIC -all: kp_nvprof_focused_connector.so +all: kp_nvtx_focused_connector.so MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) -CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}/../../common/makefile-only -I${MAKEFILE_PATH}../all +CXXFLAGS+=-I${MAKEFILE_PATH} -I${MAKEFILE_PATH}../../common/makefile-only -I${MAKEFILE_PATH}../all -kp_nvprof_focused_connector.so: ${MAKEFILE_PATH}kp_nvprof_focused_connector.cpp +kp_nvtx_focused_connector.so: ${MAKEFILE_PATH}kp_nvtx_focused_connector.cpp $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) $(LDFLAGS) \ - -o $@ ${MAKEFILE_PATH}kp_nvprof_focused_connector.cpp $(LIBS) + -o $@ ${MAKEFILE_PATH}kp_nvtx_focused_connector.cpp $(LIBS) clean: rm *.so diff --git a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp similarity index 68% rename from profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp rename to profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp index 9d971db76..b4a434d36 100644 --- a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector.cpp +++ b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector.cpp @@ -23,15 +23,28 @@ #include #include -#include "kp_nvprof_focused_connector_domain.h" +#include "kp_nvtx_focused_connector_domain.h" + +#include "nvToolsExt.h" #include "kp_core.hpp" +static bool tool_globfences; namespace KokkosTools { -namespace NVProfFocusedConnector { +namespace NVTXFocusedConnector { -static KernelNVProfFocusedConnectorInfo* currentKernel; -static std::unordered_map +void kokkosp_request_tool_settings(const uint32_t, + Kokkos_Tools_ToolSettings* settings) { + settings->requires_global_fencing = true; + if (tool_globfences) { + settings->requires_global_fencing = true; + } else { + settings->requires_global_fencing = false; + } +} // end request tool settings + +static KernelNVTXFocusedConnectorInfo* currentKernel; +static std::unordered_map domain_map; static uint64_t nextKernelID; @@ -41,52 +54,61 @@ void kokkosp_init_library( struct Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) { printf("-----------------------------------------------------------\n"); printf( - "KokkosP: NVProf Analyzer Focused Connector (sequence is %d, version: " + "KokkosP: NVTX Analyzer Focused Connector (sequence is %d, version: " "%llu)\n", loadSeq, (unsigned long long)(interfaceVer)); printf("-----------------------------------------------------------\n"); - + const char* tool_global_fences = getenv("KOKKOS_TOOLS_GLOBALFENCES"); + if (NULL != tool_global_fences) { + tool_globfences = + (atoi(tool_global_fences) != 0); // if user sets to 0, no global fences + } else { + tool_globfences = + true; // default to true to be conservative for capturing state by tool + } + nvtxNameOsThread(pthread_self(), "Application Main Thread"); + nvtxMarkA("Kokkos::Initialization Complete"); nextKernelID = 0; -} +} // end kokkosp_init_library -KernelNVProfFocusedConnectorInfo* getFocusedConnectorInfo( +KernelNVTXFocusedConnectorInfo* getFocusedConnectorInfo( const char* name, KernelExecutionType kType) { std::string nameStr(name); auto kDomain = domain_map.find(nameStr); currentKernel = NULL; if (kDomain == domain_map.end()) { - currentKernel = new KernelNVProfFocusedConnectorInfo(name, kType); - domain_map.insert(std::pair( + currentKernel = new KernelNVTXFocusedConnectorInfo(name, kType); + domain_map.insert(std::pair( nameStr, currentKernel)); } else { currentKernel = kDomain->second; } return currentKernel; -} +} // end getFocusedConnectorInfo void focusedConnectorExecuteStart() { cudaProfilerStart(); currentKernel->startRange(); -} +} // end focusedConnectorExecuteStart void focusedConnectorExecuteEnd() { currentKernel->endRange(); cudaProfilerStop(); currentKernel = NULL; -} +} // end focusedConnectorExecuteEnd void kokkosp_finalize_library() { printf("-----------------------------------------------------------\n"); - printf("KokkosP: Finalization of NVProf Connector. Complete.\n"); + printf( + "KokkosP: Finalization of NVTX Analyzer Focused Connector. Complete.\n"); printf("-----------------------------------------------------------\n"); -} +} // end kokkosp_finalize_library void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, uint64_t* kID) { - *kID = nextKernelID++; - + *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_FOR); focusedConnectorExecuteStart(); } @@ -97,8 +119,7 @@ void kokkosp_end_parallel_for(const uint64_t /*kID*/) { void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/, uint64_t* kID) { - *kID = nextKernelID++; - + *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_SCAN); focusedConnectorExecuteStart(); } @@ -109,8 +130,7 @@ void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/, uint64_t* kID) { - *kID = nextKernelID++; - + *kID = nextKernelID++; currentKernel = getFocusedConnectorInfo(name, PARALLEL_REDUCE); focusedConnectorExecuteStart(); } @@ -123,6 +143,7 @@ Kokkos::Tools::Experimental::EventSet get_event_set() { Kokkos::Tools::Experimental::EventSet my_event_set; memset(&my_event_set, 0, sizeof(my_event_set)); // zero any pointers not set here + my_event_set.request_tool_settings = kokkosp_request_tool_settings; my_event_set.init = kokkosp_init_library; my_event_set.finalize = kokkosp_finalize_library; my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; @@ -132,15 +153,14 @@ Kokkos::Tools::Experimental::EventSet get_event_set() { my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; return my_event_set; -} +} // end get_event_set -} // namespace NVProfFocusedConnector +} // namespace NVTXFocusedConnector } // namespace KokkosTools extern "C" { - -namespace impl = KokkosTools::NVProfFocusedConnector; - +namespace impl = KokkosTools::NVTXFocusedConnector; +EXPOSE_TOOL_SETTINGS(impl::kokkosp_request_tool_settings) EXPOSE_INIT(impl::kokkosp_init_library) EXPOSE_FINALIZE(impl::kokkosp_finalize_library) EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) @@ -149,5 +169,4 @@ EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) - } // extern "C" diff --git a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h similarity index 84% rename from profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h rename to profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h index e668a9a3c..d38f60538 100644 --- a/profiling/nvprof-focused-connector/kp_nvprof_focused_connector_domain.h +++ b/profiling/nvtx-focused-connector/kp_nvtx_focused_connector_domain.h @@ -14,8 +14,8 @@ // //@HEADER -#ifndef _H_KOKKOSP_KERNEL_NVPROF_CONNECTOR_INFO -#define _H_KOKKOSP_KERNEL_NVPROF_CONNECTOR_INFO +#ifndef KOKKOSP_KERNEL_NVTX_CONNECTOR_H +#define KOKKOSP_KERNEL_NVTX_CONNECTOR_H #include #include @@ -24,7 +24,7 @@ #include "nvToolsExt.h" namespace KokkosTools { -namespace NVProfFocusedConnector { +namespace NVTXFocusedConnector { enum KernelExecutionType { PARALLEL_FOR = 0, @@ -32,10 +32,10 @@ enum KernelExecutionType { PARALLEL_SCAN = 2 }; -class KernelNVProfFocusedConnectorInfo { +class KernelNVTXFocusedConnectorInfo { public: - KernelNVProfFocusedConnectorInfo(std::string kName, - KernelExecutionType kernelType) { + KernelNVTXFocusedConnectorInfo(std::string kName, + KernelExecutionType kernelType) { domainNameHandle = kName; char* domainName = (char*)malloc(sizeof(char*) * (32 + kName.size())); @@ -71,7 +71,7 @@ class KernelNVProfFocusedConnectorInfo { std::string getDomainNameHandle() { return domainNameHandle; } - ~KernelNVProfFocusedConnectorInfo() { nvtxDomainDestroy(domain); } + ~KernelNVTXFocusedConnectorInfo() { nvtxDomainDestroy(domain); } private: std::string domainNameHandle; @@ -81,4 +81,4 @@ class KernelNVProfFocusedConnectorInfo { #endif } -} // KokkosTools::NVProfFocusedConnector +} // KokkosTools::NVTXFocusedConnector