Skip to content

Commit

Permalink
Better kernel debug support (#691)
Browse files Browse the repository at this point in the history
- Introduce debug singleton object used to hold debug environment
  information during runtime.  This could be a good place to store new
  kinds of debug flags in the future for runtime.
- Wire it up through ttrt.
- loadKernelsFromDisk added to denote reloading of generated kernels
  from /tmp, from previous run, instead of loading them from the
  flatbuffer.
- Name the kernels with program and location info from the MLIR graph.
  Now names look like /tmp/ttmlir_multiply_%5_tensix.cpp
                                  ^        ^  ^
                       Func name -/        |  |
                                           |  |
                         Result Value Loc -/  |
                                              |
         Thread type/info [noc, eth, tensix] -/
  • Loading branch information
nsmithtt authored Sep 16, 2024
1 parent 4fa43c6 commit e891e98
Show file tree
Hide file tree
Showing 11 changed files with 183 additions and 9 deletions.
8 changes: 8 additions & 0 deletions runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
option(TTMLIR_ENABLE_RUNTIME_TESTS "Enable runtime tests" OFF)
option(TT_RUNTIME_ENABLE_TTNN "Enable TTNN Runtime" ON)
option(TT_RUNTIME_ENABLE_TTMETAL "Enable TTMetal Runtime" ON)
option(TT_RUNTIME_DEBUG "Enable debug tools in runtime" OFF)

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(TT_RUNTIME_DEBUG ON)
endif()
if (TT_RUNTIME_DEBUG)
add_definitions(-DTT_RUNTIME_DEBUG)
endif()

add_subdirectory(lib)
add_subdirectory(tools)
Expand Down
41 changes: 41 additions & 0 deletions runtime/include/tt/runtime/detail/debug.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
//
// SPDX-License-Identifier: Apache-2.0

#ifndef TT_RUNTIME_DETAIL_DEBUG_H
#define TT_RUNTIME_DETAIL_DEBUG_H

#include <ostream>

namespace tt::runtime::debug {

struct Env {
#if defined(TT_RUNTIME_DEBUG) && TT_RUNTIME_DEBUG == 1
static Env const &
#else
constexpr static Env
#endif
get(bool loadKernelsFromDisk = false)
#if defined(TT_RUNTIME_DEBUG) && TT_RUNTIME_DEBUG == 1
;
#else
{
return Env(false);
}
#endif

bool loadKernelsFromDisk;

private:
constexpr Env(bool loadKernelsFromDisk)
: loadKernelsFromDisk(loadKernelsFromDisk) {}
};

inline std::ostream &operator<<(std::ostream &os, Env const &env) {
os << "Env{loadKernelsFromDisk=" << env.loadKernelsFromDisk << "}";
return os;
}

} // namespace tt::runtime::debug

#endif // TT_RUNTIME_DETAIL_DEBUG_H
4 changes: 3 additions & 1 deletion runtime/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ if (TTMLIR_ENABLE_RUNTIME AND (TT_RUNTIME_ENABLE_TTNN OR TT_RUNTIME_ENABLE_TTMET
add_subdirectory(common)
else()
add_library(TTRuntimeSysDesc INTERFACE)
add_library(TTRuntimeDebug INTERFACE)
endif()

add_library(TTRuntime STATIC runtime.cpp)
Expand All @@ -51,6 +52,7 @@ target_link_libraries(TTRuntime
TTRuntimeSysDesc
TTRuntimeTTNN
TTRuntimeTTMetal
TTRuntimeDebug
)

add_dependencies(TTRuntime TTBinary TTRuntimeSysDesc FBS_GENERATION)
add_dependencies(TTRuntime TTBinary TTRuntimeSysDesc TTRuntimeDebug FBS_GENERATION)
6 changes: 6 additions & 0 deletions runtime/lib/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@ target_include_directories(TTRuntimeSysDesc
)
target_include_directories(TTRuntimeSysDesc PUBLIC "$<BUILD_INTERFACE:${TTMETAL_INCLUDE_DIRS}>")
add_dependencies(TTRuntimeSysDesc tt-metal FBS_GENERATION)

add_library(TTRuntimeDebug STATIC debug.cpp)
target_include_directories(TTRuntimeDebug
PUBLIC
${PROJECT_SOURCE_DIR}/runtime/include
)
18 changes: 18 additions & 0 deletions runtime/lib/common/debug.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
//
// SPDX-License-Identifier: Apache-2.0

#include "tt/runtime/detail/debug.h"

#if defined(TT_RUNTIME_DEBUG) && TT_RUNTIME_DEBUG == 1

namespace tt::runtime::debug {

Env const &Env::get(bool loadKernelsFromDisk) {
static Env config(loadKernelsFromDisk);
return config;
}

} // namespace tt::runtime::debug

#endif
82 changes: 76 additions & 6 deletions runtime/lib/ttmetal/command_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include <unordered_map>

#include "tt/runtime/detail/debug.h"
#include "tt/runtime/detail/ttmetal.h"
#include "tt/runtime/runtime.h"
#include "tt/runtime/utils.h"
Expand All @@ -21,6 +22,7 @@ struct CQExecutor {
std::unordered_map<std::uint32_t, std::shared_ptr<::tt::tt_metal::Event>>
events;
::tt::tt_metal::CommandQueue *cq;
char const *currentProgramName;

CQExecutor(::tt::tt_metal::Device *device, std::size_t cq_id,
std::vector<InputBuffer> const &inputs,
Expand All @@ -29,7 +31,8 @@ struct CQExecutor {
std::shared_ptr<::tt::tt_metal::Event>
execute(::tt::target::metal::CommandQueue const *commandQueue);
void execute(::tt::target::metal::Command const *command);
void execute(::tt::target::metal::EnqueueProgramCommand const *command);
void execute(::tt::target::metal::EnqueueProgramCommand const *command,
char const *debugInfo);
void execute(::tt::target::metal::EnqueueWriteBufferCommand const *command);
void execute(::tt::target::metal::EnqueueReadBufferCommand const *command);
void execute(::tt::target::metal::CreateBufferCommand const *command);
Expand Down Expand Up @@ -64,6 +67,8 @@ CQExecutor::CQExecutor(::tt::tt_metal::Device *device, std::size_t cq_id,

std::shared_ptr<::tt::tt_metal::Event>
CQExecutor::execute(::tt::target::metal::CommandQueue const *commandQueue) {
currentProgramName = commandQueue->name()->c_str();

for (auto const &event : initEvents) {
::tt::tt_metal::EnqueueWaitForEvent(*cq, event);
}
Expand All @@ -83,7 +88,8 @@ CQExecutor::execute(::tt::target::metal::CommandQueue const *commandQueue) {
void CQExecutor::execute(::tt::target::metal::Command const *command) {
switch (command->type_type()) {
case ::tt::target::metal::CommandType::EnqueueProgramCommand: {
execute(command->type_as_EnqueueProgramCommand());
execute(command->type_as_EnqueueProgramCommand(),
command->debug_info()->c_str());
break;
}
case ::tt::target::metal::CommandType::EnqueueWriteBufferCommand: {
Expand Down Expand Up @@ -132,8 +138,73 @@ void CQExecutor::execute(::tt::target::metal::Command const *command) {
}
}

static char const *
kernelSourceTypeString(::tt::target::metal::KernelSource const *kernelSource) {
switch (kernelSource->config_type()) {
case ::tt::target::metal::KernelConfig::NONE: {
break;
}
case ::tt::target::metal::KernelConfig::NocConfig: {
switch (kernelSource->config_as_NocConfig()->noc_index()) {
case tt::target::metal::NocIndex::Noc0: {
return "noc0";
}
case tt::target::metal::NocIndex::Noc1: {
return "noc1";
}
}
}
case ::tt::target::metal::KernelConfig::EthernetConfig: {
switch (kernelSource->config_as_EthernetConfig()->eth_type()) {
case tt::target::metal::EthType::Sender: {
return "ethSender";
}
case tt::target::metal::EthType::Receiver: {
return "ethReceiver";
}
}
}
case ::tt::target::metal::KernelConfig::TensixConfig: {
return "tensix";
}
}
return "unknown";
}

static std::string parseLocFromDebugInfo(char const *programDebugInfo) {
if (!programDebugInfo) {
static int gUnknownId = 0;
return std::string("%unknown") + std::to_string(gUnknownId++);
}
std::string debugInfo(programDebugInfo);
std::size_t pos = debugInfo.find_first_of(' ');
if (pos == std::string::npos) {
return debugInfo;
}
return debugInfo.substr(0, pos);
}

static std::string createKernelFilePath(
char const *currentProgramName, char const *programDebugInfo,
::tt::target::metal::KernelSource const *kernelSource,
char const *prefix = "/tmp/ttmlir_", char const *extention = ".cpp") {
std::string path(prefix);
path += currentProgramName;
path += "_";
path += parseLocFromDebugInfo(programDebugInfo);
path += "_";
path += kernelSourceTypeString(kernelSource);
path += extention;
return path;
}

static void writeFile(std::string const &fileName, char const *data,
std::size_t size) {
if (debug::Env::get().loadKernelsFromDisk) {
std::ifstream file(fileName);
assert(file.is_open() && "Kernel file not found");
return;
}
std::ofstream file(fileName);
file.write(data, size);
file.close();
Expand Down Expand Up @@ -313,9 +384,8 @@ static void processRuntimeArgs(
}

void CQExecutor::execute(
::tt::target::metal::EnqueueProgramCommand const *command) {
static int gKernelId = 0;

::tt::target::metal::EnqueueProgramCommand const *command,
char const *debugInfo) {
::tt::tt_metal::Program program = ::tt::tt_metal::CreateProgram();

for (::tt::target::metal::KernelDesc const *kernelDesc :
Expand All @@ -326,7 +396,7 @@ void CQExecutor::execute(
// We need a new API to create a kernel from source string, or directly from
// binary
std::string fileName =
"/tmp/ttmlir_" + std::to_string(gKernelId++) + ".cpp";
createKernelFilePath(currentProgramName, debugInfo, kernelSource);
writeFile(fileName, kernelSource->source()->c_str(),
kernelSource->source()->size());
CoreRangeSet coreRange = toCoreRangeSet(kernelDesc->core_range_set());
Expand Down
1 change: 1 addition & 0 deletions runtime/tools/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ add_custom_target(ttrt
TT_RUNTIME_ENABLE_TTNN=${TT_RUNTIME_ENABLE_TTNN}
TT_RUNTIME_ENABLE_TTMETAL=${TT_RUNTIME_ENABLE_TTMETAL}
TT_RUNTIME_ENABLE_PERF_TRACE=${TT_RUNTIME_ENABLE_PERF_TRACE}
TT_RUNTIME_DEBUG=${TT_RUNTIME_DEBUG}
TTMLIR_VERSION_MAJOR=${TTMLIR_VERSION_MAJOR}
TTMLIR_VERSION_MINOR=${TTMLIR_VERSION_MINOR}
TTMLIR_VERSION_PATCH=${TTMLIR_VERSION_PATCH}
Expand Down
9 changes: 7 additions & 2 deletions runtime/tools/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
enable_ttnn = os.environ.get("TT_RUNTIME_ENABLE_TTNN", "OFF") == "ON"
enable_ttmetal = os.environ.get("TT_RUNTIME_ENABLE_TTMETAL", "OFF") == "ON"
enable_perf = os.environ.get("TT_RUNTIME_ENABLE_PERF_TRACE", "OFF") == "ON"
debug_runtime = os.environ.get("TT_RUNTIME_DEBUG", "OFF") == "ON"

ext_modules = [
Pybind11Extension(
Expand Down Expand Up @@ -65,6 +66,7 @@

if enable_ttnn or enable_ttmetal:
runlibs += ["libdevice.so", "libnng.so.1", "libuv.so.1"]
linklibs += ["TTRuntimeDebug"]

if enable_perf:
runlibs += ["libtracy.so.0.10.0"]
Expand Down Expand Up @@ -139,7 +141,7 @@

def package_files(directory):
paths = []
for (path, directories, filenames) in os.walk(directory):
for path, directories, filenames in os.walk(directory):
for filename in filenames:
paths.append(os.path.join("..", path, filename))
return paths
Expand Down Expand Up @@ -182,7 +184,10 @@ def package_files(directory):
f"{src_dir}/build/runtime/tools/python/ttrt/runtime",
f"{metaldir}/lib",
],
define_macros=[("VERSION_INFO", __version__)],
define_macros=[
("VERSION_INFO", __version__),
("TT_RUNTIME_DEBUG", "1" if debug_runtime else "0"),
],
)
)

Expand Down
11 changes: 11 additions & 0 deletions runtime/tools/python/ttrt/common/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,14 @@ def initialize_apis():
help="--save-artifacts flag must be set, provides a directory path to save artifacts to",
api_only=False,
)
API.Run.register_arg(
name="--load-kernels-from-disk",
type=bool,
default=False,
choices=[True, False],
help="Pickup the kernels from disk (/tmp) instead of the flatbuffer",
api_only=False,
)

# register all perf arguments
API.Perf.register_arg(
Expand Down Expand Up @@ -838,6 +846,9 @@ def _execute(binaries):
self.logging.warning(f"no binaries found to run - returning early")
return

debug_env = ttrt.runtime.DebugEnv.get(self.load_kernels_from_disk)
self.logging.debug(f"setting tt runtime debug env={debug_env}")

self.logging.debug(f"setting torch manual seed={self['seed']}")
torch.manual_seed(self["seed"])
ttrt.runtime.set_compatible_runtime(binaries[0].fbb)
Expand Down
1 change: 1 addition & 0 deletions runtime/tools/python/ttrt/runtime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Tensor,
DataType,
DeviceRuntime,
DebugEnv,
get_current_runtime,
set_compatible_runtime,
get_current_system_desc,
Expand Down
11 changes: 11 additions & 0 deletions runtime/tools/python/ttrt/runtime/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
//
// SPDX-License-Identifier: Apache-2.0

#include <sstream>

#include "tt/runtime/detail/debug.h"
#include "tt/runtime/runtime.h"

#include <pybind11/pybind11.h>
Expand Down Expand Up @@ -64,4 +67,12 @@ PYBIND11_MODULE(_C, m) {
py::arg("executable"), py::arg("program_index"), py::arg("inputs"),
py::arg("outputs"), "Submit a binary for execution");
m.def("wait", &tt::runtime::wait, py::arg("event"));

py::class_<tt::runtime::debug::Env>(m, "DebugEnv")
.def_static("get", &tt::runtime::debug::Env::get)
.def("__str__", [](const tt::runtime::debug::Env &env) {
std::stringstream os;
os << env;
return os.str();
});
}

0 comments on commit e891e98

Please sign in to comment.