Skip to content

Commit

Permalink
#341: Added support for step debugger and memory dumps after each op …
Browse files Browse the repository at this point in the history
…invocation (#1616)

2 new modes in ttrt (debugger + memory)
```
ttrt run <flatbuffer> --debugger
```
This will start a pdb debugger after every op invocation in runtime.
Further support will be added.


```
ttrt run <flatbuffer> --memory
ttrt perf <flatbuffer> --memory
```



This will dump memory reports after each op invocation in runtime. A
memory_report.json file will be dumped, containing all the ops (an entry
for each) and the dram/l1 memory usage. This is global view of the
board.

```
{
        "loc": "loc(\"/code/tt-mlir/test/python/golden/test_ttir_ops.py:65:id(0)\")",
        "debug_str": "%6 = \"ttnn.add\"(%2, %4, %5) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<64x128xf32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<2x4x!tt.tile<32x32, f32>, #ttnn.buffer_type<dram>>, <interleaved>>>, tensor<64x128xf32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<2x4x!tt.tile<32x32, f32>, #ttnn.buffer_type<dram>>, <interleaved>>>, tensor<64x128xf32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<2x4x!tt.tile<32x32, f32>, #ttnn.buffer_type<dram>>, <interleaved>>>) -> tensor<64x128xf32, #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<2x4x!tt.tile<32x32, f32>, #ttnn.buffer_type<dram>>, <interleaved>>> loc(\"/code/tt-mlir/test/python/golden/test_ttir_ops.py:65:id(0)\")",
        "dram": {
            "total_allocatable (bytes) : total_allocatable/bank * num_banks": "12884901504",
            "total_allocated (bytes) : total_allocated/bank * num_banks": "3268608",
            "total_free (bytes) : total_allocatable - total_allocated": "12881632896",
            "blocks": [
                {
                    "address (bytes)": "32",
                    "size (bytes)": "90112",
                    "allocated (y/n)": "Y"
                },
                {
                    "address (bytes)": "90144",
                    "size (bytes)": "90112",
                    "allocated (y/n)": "Y"
                },
                {
                    "address (bytes)": "180256",
                    "size (bytes)": "4096",
                    "allocated (y/n)": "N"
                },
                {
                    "address (bytes)": "184352",
                    "size (bytes)": "90112",
                    "allocated (y/n)": "Y"
                },
                {
                    "address (bytes)": "274464",
                    "size (bytes)": "1073465312",
                    "allocated (y/n)": "N"
                },
                {
                    "address (bytes)": "1073739776",
                    "size (bytes)": "2048",
                    "allocated (y/n)": "Y"
                }
            ],
            "total_allocatable (bytes) : per bank": "1073741792",
            "total_allocated (bytes): per bank": "272384",
            "total_free (bytes) : per bank": "1073469408",
            "largest_free_block (bytes) : per bank": "1073465312"
        },
        "l1": {
            "total_allocatable (bytes) : total_allocatable/bank * num_banks": "87504896",
            "total_allocated (bytes) : total_allocated/bank * num_banks": "0",
            "total_free (bytes) : total_allocatable - total_allocated": "87504896",
            "blocks": [
                {
                    "address (bytes)": "99104",
                    "size (bytes)": "1367264",
                    "allocated (y/n)": "N"
                }
            ],
            "total_allocatable (bytes) : per bank": "1367264",
            "total_allocated (bytes): per bank": "0",
            "total_free (bytes) : per bank": "1367264",
            "largest_free_block (bytes) : per bank": "1367264",
            "largest_contiguous_free_block (bytes) : per bank": "1367264"
        }
    },
```
  • Loading branch information
tapspatel authored Dec 19, 2024
1 parent 1b6d7d8 commit 7e5b01a
Show file tree
Hide file tree
Showing 11 changed files with 623 additions and 228 deletions.
3 changes: 3 additions & 0 deletions runtime/include/tt/runtime/detail/ttmetal.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "distributed/mesh_device.hpp"
#include "impl/buffers/circular_buffer.hpp"
#include "impl/event/event.hpp"
#include "tt_metal/detail/reports/memory_reporter.hpp"
#include "tt_metal/detail/tt_metal.hpp"
#include "tt_metal/host_api.hpp"

Expand Down Expand Up @@ -40,6 +41,8 @@ void closeDevice(Device device);

void deallocateBuffers(Device device);

void dumpMemoryReport(Device device);

void wait(Event event);

void wait(Tensor tensor);
Expand Down
3 changes: 3 additions & 0 deletions runtime/include/tt/runtime/detail/ttnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "distributed/mesh_device.hpp"
#include "host_api.hpp"
#include "hostdevcommon/common_values.hpp"
#include "tt_metal/detail/reports/memory_reporter.hpp"
#include "ttnn/device.hpp"
#include "ttnn/operations/ccl/all_gather/all_gather.hpp"
#include "ttnn/operations/conv/conv2d/conv2d.hpp"
Expand Down Expand Up @@ -90,6 +91,8 @@ void closeDevice(Device device);

void deallocateBuffers(Device device);

void dumpMemoryReport(Device device);

void wait(Event event);

void wait(Tensor tensor);
Expand Down
1 change: 1 addition & 0 deletions runtime/include/tt/runtime/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ std::pair<SystemDesc, DeviceIds> getCurrentSystemDesc();

namespace detail {
void deallocateBuffers(Device device);
void dumpMemoryReport(Device device);
} // namespace detail

DeviceRuntime getCurrentRuntime();
Expand Down
16 changes: 16 additions & 0 deletions runtime/lib/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,22 @@ void deallocateBuffers(Device device) {
#endif
LOG_FATAL("runtime is not enabled");
}

void dumpMemoryReport(Device device) {
#if defined(TT_RUNTIME_ENABLE_TTNN)
if (getCurrentRuntime() == DeviceRuntime::TTNN) {
return ::tt::runtime::ttnn::dumpMemoryReport(device);
}
#endif

#if defined(TT_RUNTIME_ENABLE_TTMETAL)
if (getCurrentRuntime() == DeviceRuntime::TTMetal) {
return ::tt::runtime::ttmetal::dumpMemoryReport(device);
}
#endif

LOG_FATAL("runtime is not enabled");
}
} // namespace detail

DeviceRuntime getCurrentRuntime() {
Expand Down
10 changes: 10 additions & 0 deletions runtime/lib/ttmetal/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,16 @@ void deallocateBuffers(Device deviceHandle) {
}
}

void dumpMemoryReport(Device deviceHandle) {
::tt::tt_metal::distributed::MeshDevice &meshDevice =
deviceHandle.as<::tt::tt_metal::distributed::MeshDevice>(
DeviceRuntime::TTMetal);

for (::tt::tt_metal::Device *device : meshDevice.get_devices()) {
::tt::tt_metal::detail::DumpDeviceMemoryState(device);
}
}

void wait(Event event) {
Events events = event.as<Events>(DeviceRuntime::TTMetal);
for (auto e : events) {
Expand Down
8 changes: 8 additions & 0 deletions runtime/lib/ttnn/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,14 @@ void deallocateBuffers(Device deviceHandle) {
}
}

void dumpMemoryReport(Device deviceHandle) {
::ttnn::MeshDevice &meshDevice =
deviceHandle.as<::ttnn::MeshDevice>(DeviceRuntime::TTNN);
for (::ttnn::Device *device : meshDevice.get_devices()) {
::tt::tt_metal::detail::DumpDeviceMemoryState(device);
}
}

void wait(Event event) {
// Nothing to do for ttnn runtime
LOG_ASSERT(event.matchesRuntime(DeviceRuntime::TTNN));
Expand Down
Loading

0 comments on commit 7e5b01a

Please sign in to comment.