Skip to content

Commit

Permalink
#0: Move global CBs to v1 namespace
Browse files Browse the repository at this point in the history
  • Loading branch information
tt-aho committed Dec 3, 2024
1 parent 1409f17 commit 613aff0
Show file tree
Hide file tree
Showing 15 changed files with 152 additions and 142 deletions.
1 change: 1 addition & 0 deletions Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,7 @@ INPUT = tt_metal/hw/inc/dataflow_api.h \
tt_metal/include/tt_metal/command_queue.hpp \
tt_metal/include/tt_metal/device.hpp \
tt_metal/include/tt_metal/event.hpp \
tt_metal/include/tt_metal/global_circular_buffer.hpp \
tt_metal/include/tt_metal/kernel.hpp \
tt_metal/include/tt_metal/program.hpp \
tt_metal/include/tt_metal/trace.hpp
Expand Down
20 changes: 11 additions & 9 deletions tests/tt_metal/tt_metal/api/test_global_circular_buffers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include "tt_metal/detail/tt_metal.hpp"
#include "tt_metal/host_api.hpp"
#include "tt_metal/impl/buffers/global_circular_buffer.hpp"
#include "tt_metal/include/tt_metal/global_circular_buffer.hpp"
#include "tt_metal/include/tt_metal/program.hpp"

TEST_F(DispatchFixture, TensixCreateGlobalCircularBuffers) {
CoreRangeSet cores(CoreRange({1, 1}, {1, 1}));
Expand All @@ -21,7 +23,7 @@ TEST_F(DispatchFixture, TensixCreateGlobalCircularBuffers) {
{
std::unordered_map<CoreCoord, CoreRangeSet> sender_receiver_core_mapping;
sender_receiver_core_mapping[CoreCoord(0, 0)] = cores;
auto global_cb = tt::tt_metal::experimental::CreateGlobalCircularBuffer(
auto global_cb = tt::tt_metal::v1::experimental::CreateGlobalCircularBuffer(
device, sender_receiver_core_mapping, 3200, tt::tt_metal::BufferType::L1);
auto buffer_address = global_cb->buffer_address();
auto config_address = global_cb->config_address();
Expand All @@ -32,7 +34,7 @@ TEST_F(DispatchFixture, TensixCreateGlobalCircularBuffers) {
sender_receiver_core_mapping[CoreCoord(1, 1)] = cores3;
// sender receiver cores overlap
EXPECT_THROW(
tt::tt_metal::experimental::CreateGlobalCircularBuffer(
tt::tt_metal::v1::experimental::CreateGlobalCircularBuffer(
device, sender_receiver_core_mapping, 3200, tt::tt_metal::BufferType::L1),
std::exception);
}
Expand All @@ -42,7 +44,7 @@ TEST_F(DispatchFixture, TensixCreateGlobalCircularBuffers) {
sender_receiver_core_mapping[CoreCoord(0, 1)] = cores2;
// receiver cores overlap
EXPECT_THROW(
tt::tt_metal::experimental::CreateGlobalCircularBuffer(
tt::tt_metal::v1::experimental::CreateGlobalCircularBuffer(
device, sender_receiver_core_mapping, 3200, tt::tt_metal::BufferType::L1),
std::exception);
}
Expand All @@ -60,11 +62,11 @@ TEST_F(DispatchFixture, TensixProgramGlobalCircularBuffers) {
auto device = devices_[0];
std::unordered_map<CoreCoord, CoreRangeSet> sender_receiver_core_mapping;
sender_receiver_core_mapping[CoreCoord(0, 0)] = receiver_cores;
auto global_cb = tt::tt_metal::experimental::CreateGlobalCircularBuffer(
auto global_cb = tt::tt_metal::v1::experimental::CreateGlobalCircularBuffer(
device, sender_receiver_core_mapping, 3200, tt::tt_metal::BufferType::L1);
std::unordered_map<CoreCoord, CoreRangeSet> dummy_sender_receiver_core_mapping;
dummy_sender_receiver_core_mapping[CoreCoord(0, 0)] = dummy_receiver_cores;
auto dummy_global_cb = tt::tt_metal::experimental::CreateGlobalCircularBuffer(
auto dummy_global_cb = tt::tt_metal::v1::experimental::CreateGlobalCircularBuffer(
device, dummy_sender_receiver_core_mapping, 3200, tt::tt_metal::BufferType::L1);
{
tt::tt_metal::Program program = CreateProgram();
Expand All @@ -81,14 +83,14 @@ TEST_F(DispatchFixture, TensixProgramGlobalCircularBuffers) {
global_cb_config.index(local_cb_index).set_page_size(cb_page_size).set_data_format(tile_format);
EXPECT_THROW(global_cb_config.remote_index(2), std::exception);
EXPECT_THROW(
tt::tt_metal::experimental::CreateCircularBuffer(
tt::tt_metal::v1::experimental::CreateCircularBuffer(
program, CoreRangeSet(CoreRange({3, 3})), global_cb_config, *global_cb),
std::exception);
auto remote_cb =
tt::tt_metal::experimental::CreateCircularBuffer(program, receiver_cores, global_cb_config, *global_cb);
tt::tt_metal::v1::experimental::CreateCircularBuffer(program, receiver_cores, global_cb_config, *global_cb);
tt::tt_metal::detail::CompileProgram(device, program);
program.finalize(device);
UpdateDynamicCircularBufferAddress(program, remote_cb, *global_cb);
tt::tt_metal::v1::experimental::UpdateDynamicCircularBufferAddress(program, remote_cb, *global_cb);
EXPECT_THROW(UpdateDynamicCircularBufferAddress(program, remote_cb, *dummy_global_cb), std::exception);
}
{
Expand All @@ -105,7 +107,7 @@ TEST_F(DispatchFixture, TensixProgramGlobalCircularBuffers) {
global_cb_config.remote_index(remote_cb_index).set_page_size(cb_page_size).set_data_format(tile_format);
global_cb_config.index(local_cb_index).set_page_size(cb_page_size).set_data_format(tile_format);
auto remote_cb =
tt::tt_metal::experimental::CreateCircularBuffer(program, receiver_cores, global_cb_config, *global_cb);
tt::tt_metal::v1::experimental::CreateCircularBuffer(program, receiver_cores, global_cb_config, *global_cb);
tt::tt_metal::detail::CompileProgram(device, program);
EXPECT_THROW(program.finalize(device), std::exception);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "tt_metal/host_api.hpp"
#include "tt_metal/impl/buffers/global_circular_buffer.hpp"
#include "tt_metal/impl/buffers/global_semaphore.hpp"
#include "tt_metal/include/tt_metal/global_circular_buffer.hpp"
#include "tt_metal/include/tt_metal/program.hpp"
#include "tt_metal/tt_metal/perf_microbenchmark/common/util.hpp"
#include "tt_metal/common/work_split.hpp"
#include "tests/tt_metal/test_utils/tilization.hpp"
Expand Down Expand Up @@ -78,7 +80,7 @@ void get_max_page_size_and_num_pages(
num_pages = total_size / page_size;
}

std::tuple<std::vector<tt_metal::Program>,std::shared_ptr<tt_metal::experimental::GlobalCircularBuffer>>
std::tuple<std::vector<tt_metal::Program>,std::shared_ptr<tt_metal::v1::experimental::GlobalCircularBuffer>>
create_programs(
tt_metal::Device* device,
const CoreRangeSet& dram_reader_core,
Expand Down Expand Up @@ -139,12 +141,12 @@ create_programs(
.set_page_size(reader_cb_index, single_tile_size);
auto reader_cb = tt_metal::CreateCircularBuffer(sender_program, dram_reader_core, reader_cb_config);

auto global_cb = tt_metal::experimental::CreateGlobalCircularBuffer(
auto global_cb = tt_metal::v1::experimental::CreateGlobalCircularBuffer(
device, sender_receiver_core_mapping, padded_global_cb_size, tt_metal::BufferType::L1);
tt_metal::CircularBufferConfig writer_cb_config = tt_metal::CircularBufferConfig(receiver_cb_size);
writer_cb_config.remote_index(writer_cb_index).set_page_size(single_tile_size).set_data_format(tile_format);
auto writer_cb =
tt_metal::experimental::CreateCircularBuffer(sender_program, dram_reader_core, writer_cb_config, *global_cb);
tt_metal::v1::experimental::CreateCircularBuffer(sender_program, dram_reader_core, writer_cb_config, *global_cb);

// mixed cb dataformat
uint32_t next_layer_num_blocks = num_blocks * 2;
Expand Down Expand Up @@ -175,7 +177,7 @@ create_programs(
uint32_t receiver_page_size = 32;
tt_metal::CircularBufferConfig receiver_cb_config = tt_metal::CircularBufferConfig(receiver_cb_size);
receiver_cb_config.remote_index(receiver_cb_index).set_page_size(single_tile_size).set_data_format(tile_format);
auto receiver_cb = tt_metal::experimental::CreateCircularBuffer(
auto receiver_cb = tt_metal::v1::experimental::CreateCircularBuffer(
receiver_program, l1_receiver_cores, receiver_cb_config, *global_cb);

log_info("reader_cb_size: {}", reader_cb_size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "tt_metal/host_api.hpp"
#include "tt_metal/impl/buffers/global_circular_buffer.hpp"
#include "tt_metal/impl/buffers/global_semaphore.hpp"
#include "tt_metal/include/tt_metal/global_circular_buffer.hpp"
#include "tt_metal/include/tt_metal/program.hpp"
#include "tt_metal/tt_metal/perf_microbenchmark/common/util.hpp"
#include "tt_metal/common/work_split.hpp"
#include "tests/tt_metal/test_utils/tilization.hpp"
Expand Down Expand Up @@ -93,7 +95,7 @@ std::tuple<uint32_t, uint32_t> get_out_subblock_params(
return {1, 1};
}

std::tuple<std::vector<tt_metal::Program>, std::shared_ptr<tt::tt_metal::experimental::GlobalCircularBuffer>>
std::tuple<std::vector<tt_metal::Program>, std::shared_ptr<tt::tt_metal::v1::experimental::GlobalCircularBuffer>>
create_programs(
tt_metal::Device* device,
const CoreRangeSet& dram_reader_core,
Expand Down Expand Up @@ -160,13 +162,13 @@ create_programs(
uint32_t in1_receiver_cb_size = in1_block_h * in1_block_w * single_tile_size * cb_num_blocks / num_receivers;
uint32_t padded_global_cb_size = in1_receiver_cb_size + cb_padding;

auto global_cb = tt_metal::experimental::CreateGlobalCircularBuffer(
auto global_cb = tt_metal::v1::experimental::CreateGlobalCircularBuffer(
device, sender_receiver_core_mapping, padded_global_cb_size, tt_metal::BufferType::L1);

uint32_t in1_writer_cb_index = 31;
tt_metal::CircularBufferConfig in1_writer_cb_config = tt_metal::CircularBufferConfig(in1_receiver_cb_size);
in1_writer_cb_config.remote_index(in1_writer_cb_index).set_page_size(single_tile_size).set_data_format(tile_format);
auto writer_cb = tt_metal::experimental::CreateCircularBuffer(
auto writer_cb = tt_metal::v1::experimental::CreateCircularBuffer(
sender_program, dram_reader_core, in1_writer_cb_config, *global_cb);

// in0 reader CB
Expand All @@ -187,7 +189,7 @@ create_programs(
.set_page_size(single_tile_size)
.set_data_format(tile_format);
in1_receiver_cb_config.index(in1_pusher_cb_index).set_page_size(single_tile_size).set_data_format(tile_format);
auto in1_receiver_cb = tt_metal::experimental::CreateCircularBuffer(
auto in1_receiver_cb = tt_metal::v1::experimental::CreateCircularBuffer(
receiver_program, l1_receiver_cores, in1_receiver_cb_config, *global_cb);

// output CB
Expand Down
75 changes: 0 additions & 75 deletions tt_metal/host_api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,6 @@ class Event;
class Buffer;
class GlobalSemaphore;

namespace experimental {

class GlobalCircularBuffer;

} // namespace experimental

// ==================================================
// HOST API: Device management
// ==================================================
Expand Down Expand Up @@ -264,50 +258,6 @@ void UpdateCircularBufferPageSize(Program& program, CBHandle cb_handle, uint8_t
// clang-format on
void UpdateDynamicCircularBufferAddress(Program& program, CBHandle cb_handle, const Buffer& buffer);

namespace experimental {

// clang-format off
/**
* Creates a Circular Buffer (CB) in L1 memory of all cores within core ranges (inclusive) and adds it to the program. There can be a total of NUM_CIRCULAR_BUFFERS (32) circular buffers per core.
* Circular buffers hold data and have an associated config which indicates usage of the address space.
* If the config is specified for multiple buffer indices, the circular buffer address space is shared and each buffer index can potentially have a unique view of the shared space.
*
* This will create a dynamic CB that uses the address space of the GlobalCircularBuffer specified, and will set up any remote CB IDs specified in the config with the GlobalCircularBuffer's config.
*
* Return value: Circular Buffer ID (uintptr_t)
*
* | Argument | Description | Type | Valid Range | Required |
* |------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------|-------------|----------|
* | program | The program to which buffer will be added to | Program & | | Yes |
* | core_spec | Either a single logical core, a range of logical cores or a set of logical core ranges that indicate where the circular buffer will be configured | const std::variant<CoreCoord, CoreRange, CoreRangeSet> & | | Yes |
* | config | Config for circular buffer | const CircularBufferConfig & | | Yes |
* | global_circular_buffer | GlobalCircularBuffer to use the address space and configuration of for setting up remote CBs | const GlobalCircularBuffer & | | Yes |
*/
// clang-format on
CBHandle CreateCircularBuffer(
Program& program,
const std::variant<CoreCoord, CoreRange, CoreRangeSet>& core_spec,
const CircularBufferConfig& config,
const GlobalCircularBuffer& global_circular_buffer);

// clang-format off
/**
* Update the address of a dynamic circular buffer that was configured with a GlobalCircularBuffer.
*
* Return value: void
*
* | Argument | Description | Type | Valid Range | Required |
* |------------------------|----------------------------------------------------------------------------------------------------|------------------------------|-------------|----------|
* | program | The program containing the circular buffer | Program & | | Yes |
* | cb_handle | ID of the circular buffer, returned by `CreateCircularBuffers` | CBHandle (uintptr_t) | | Yes | |
* | global_circular_buffer | GlobalCircularBuffer to use the address space and configuration of for circular buffer `cb_handle` | const GlobalCircularBuffer & | | Yes |
*/
// clang-format on
void UpdateDynamicCircularBufferAddress(
Program& program, CBHandle cb_handle, const GlobalCircularBuffer& global_circular_buffer);

} // namespace experimental

// clang-format off
/**
* Update the address and total size of a dynamic circular buffer. Dynamic circular buffers share the same address space as L1 buffers.
Expand Down Expand Up @@ -378,31 +328,6 @@ std::unique_ptr<GlobalSemaphore> CreateGlobalSemaphore(
std::unique_ptr<GlobalSemaphore> CreateGlobalSemaphore(
Device* device, CoreRangeSet&& cores, uint32_t initial_value, BufferType buffer_type = BufferType::L1);

namespace experimental {

// clang-format off
/**
* Creates a global circular buffer on the specified sender and receiver cores with the given size.
* sender_receiver_core_mapping specifies which sender cores will communicate with which receiver cores.
*
* Return value: std::shared_ptr<GlobalCircularBuffer>
*
* | Argument | Description | Type | Valid Range | Required |
* |------------------------------|------------------------------------------------------------------|-----------------------------------------------------------|--------------|----------|
* | device | The device to create the circular buffer on | Device * | | Yes |
* | sender_receiver_core_mapping | Mapping of sender to receiver cores used for the circular buffer | const std::unordered_map<CoreCoord, CoreRangeSet> & | | Yes |
* | size | Circular Buffer size | uint32_t | | Yes |
* | buffer_type | Buffer type to store the global circular buffer | BufferType | L1 types | No |
*/
// clang-format on
std::shared_ptr<GlobalCircularBuffer> CreateGlobalCircularBuffer(
Device* device,
const std::unordered_map<CoreCoord, CoreRangeSet>& sender_receiver_core_mapping,
uint32_t size,
BufferType buffer_type = BufferType::L1);

} // namespace experimental

// clang-format off
/**
* Creates a pre-allocated interleaved DRAM or L1 buffer with the global allocator on device
Expand Down
4 changes: 2 additions & 2 deletions tt_metal/impl/buffers/circular_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ CircularBuffer::CircularBuffer(const CoreRangeSet& core_ranges, const CircularBu
CircularBuffer::CircularBuffer(
const CoreRangeSet& core_ranges,
const CircularBufferConfig& config,
const experimental::GlobalCircularBuffer& global_circular_buffer) :
const v1::experimental::GlobalCircularBuffer& global_circular_buffer) :
id_(reinterpret_cast<uintptr_t>(this)),
core_ranges_(core_ranges),
config_(config),
Expand Down Expand Up @@ -133,7 +133,7 @@ void CircularBuffer::assign_global_address() {
GetBufferAddress(config_.shadow_global_buffer, &globally_allocated_address_);
}

void CircularBuffer::set_global_circular_buffer(const experimental::GlobalCircularBuffer& global_circular_buffer) {
void CircularBuffer::set_global_circular_buffer(const v1::experimental::GlobalCircularBuffer& global_circular_buffer) {
TT_FATAL(
global_circular_buffer.all_cores().contains(this->core_ranges_),
"Specified cores are not contained in associated GlobalCircularBuffer");
Expand Down
12 changes: 7 additions & 5 deletions tt_metal/impl/buffers/circular_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,21 @@
#include "tt_metal/impl/buffers/circular_buffer_types.hpp"

namespace tt::tt_metal {
inline namespace v0 {

namespace v1 {
namespace experimental {
class GlobalCircularBuffer;
}
} // namespace experimental
} // namespace v1
inline namespace v0 {

class CircularBuffer {
public:
CircularBuffer(const CoreRangeSet& core_range_set, const CircularBufferConfig& config);
CircularBuffer(
const CoreRangeSet& core_ranges,
const CircularBufferConfig& config,
const experimental::GlobalCircularBuffer& global_circular_buffer);
const v1::experimental::GlobalCircularBuffer& global_circular_buffer);

const CBHandle id() const { return id_; }

Expand Down Expand Up @@ -58,7 +60,7 @@ class CircularBuffer {

void set_locally_allocated_address(uint32_t address) { this->locally_allocated_address_ = address; }

void set_global_circular_buffer(const experimental::GlobalCircularBuffer& global_circular_buffer);
void set_global_circular_buffer(const v1::experimental::GlobalCircularBuffer& global_circular_buffer);

DeviceAddr config_address() const;

Expand All @@ -77,7 +79,7 @@ class CircularBuffer {
std::optional<uint32_t> locally_allocated_address_;
uint32_t globally_allocated_address_;
DeviceAddr global_circular_buffer_config_address_;
const experimental::GlobalCircularBuffer* shadow_global_circular_buffer_ = nullptr;
const v1::experimental::GlobalCircularBuffer* shadow_global_circular_buffer_ = nullptr;
// add a callback to invalidate circular buffer allocation
};

Expand Down
4 changes: 2 additions & 2 deletions tt_metal/impl/buffers/global_circular_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

namespace tt::tt_metal {

inline namespace v0 {
namespace v1 {

namespace experimental {

Expand Down Expand Up @@ -156,6 +156,6 @@ uint32_t GlobalCircularBuffer::size() const { return this->size_; }

} // namespace experimental

} // namespace v0
} // namespace v1

} // namespace tt::tt_metal
6 changes: 5 additions & 1 deletion tt_metal/impl/buffers/global_circular_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ inline namespace v0 {
class Buffer;
class Device;

} // namespace v0

namespace v1 {

namespace experimental {

class GlobalCircularBuffer {
Expand Down Expand Up @@ -69,6 +73,6 @@ class GlobalCircularBuffer {

} // namespace experimental

} // namespace v0
} // namespace v1

} // namespace tt::tt_metal
Loading

0 comments on commit 613aff0

Please sign in to comment.