Skip to content

Commit

Permalink
Fix some namespace pollution caused by using namespace tt::tt_metal (
Browse files Browse the repository at this point in the history
  • Loading branch information
blozano-tt authored Dec 18, 2024
1 parent 1cc0474 commit b4af6c8
Show file tree
Hide file tree
Showing 44 changed files with 188 additions and 167 deletions.
4 changes: 2 additions & 2 deletions tt_metal/impl/buffers/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ CoreType Buffer::core_type() const {
}

bool Buffer::is_l1() const {
return ::is_l1(buffer_type());
return tt::tt_metal::is_l1(buffer_type());
}
bool Buffer::is_dram() const {
return buffer_type() == BufferType::DRAM || buffer_type() == BufferType::TRACE;
Expand Down Expand Up @@ -544,7 +544,7 @@ tt_metal::ShardSpec from_json_t<tt_metal::ShardSpec>::operator()(const nlohmann:
const auto& shard_mode = from_json<tt_metal::ShardMode>(json_object.at("mode"));
const auto& physical_shard_shape = from_json<std::optional<std::array<uint32_t, 2>>>(json_object.at("physical_shard_shape"));
if (physical_shard_shape.has_value()) {
TT_FATAL(shard_mode == ShardMode::LOGICAL, "Physical shard shape can only be provided in logical sharding mode!");
TT_FATAL(shard_mode == tt::tt_metal::ShardMode::LOGICAL, "Physical shard shape can only be provided in logical sharding mode!");
return tt_metal::ShardSpec{
from_json<CoreRangeSet>(json_object.at("grid")),
from_json<std::array<uint32_t, 2>>(json_object.at("shape")),
Expand Down
5 changes: 4 additions & 1 deletion tt_metal/impl/dispatch/debug_tools.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ void match_device_program_data_with_host_program_data(const char* host_file, con

// Dumps host-side CQ data into files.
void dump_cqs(
std::ofstream& cq_file, std::ofstream& iq_file, SystemMemoryManager& sysmem_manager, bool dump_raw_data = false);
std::ofstream& cq_file,
std::ofstream& iq_file,
tt::tt_metal::SystemMemoryManager& sysmem_manager,
bool dump_raw_data = false);

} // end namespace internal
10 changes: 5 additions & 5 deletions ttnn/cpp/ttnn/decorators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ inline auto create_async_output_tensors(
Tensors output_tensors;
output_tensors.reserve(std::tuple_size_v<execute_on_worker_thread_return_t>);
for (auto index = 0; index < std::tuple_size_v<execute_on_worker_thread_return_t>; index++) {
output_tensors.emplace_back(
Tensor(operation::get_workers_for_op_output(inputs, optional_inputs, enable_autoformat_device)));
output_tensors.emplace_back(Tensor(
tt::tt_metal::operation::get_workers_for_op_output(inputs, optional_inputs, enable_autoformat_device)));
}
return output_tensors;
} else {
Expand Down Expand Up @@ -265,7 +265,7 @@ struct registered_operation_t {
detail::extract_args_to_vector<std::optional<ttnn::Tensor>>(std::forward<args_t>(args)...);

bool enable_autoformat = false;
operation::launch_op(
tt::tt_metal::operation::launch_op(
[args...](
const Tensors& input_tensors,
const OptionalConstTensors& optional_input_tensors,
Expand Down Expand Up @@ -311,7 +311,7 @@ struct registered_operation_t {
template <typename... args_t>
auto operator()(args_t&&... args) const {
tt::log_debug(tt::LogOp, "Started C++ ttnn operation: {}", std::string_view{cpp_fully_qualified_name});
GraphTracker::instance().track_function_start(cpp_fully_qualified_name, args...);
tt::tt_metal::GraphTracker::instance().track_function_start(cpp_fully_qualified_name, args...);
auto output = invoke(std::forward<args_t>(args)...);

// Should every output tensor be tracked?
Expand All @@ -321,7 +321,7 @@ struct registered_operation_t {
}
*/

GraphTracker::instance().track_function_end(output);
tt::tt_metal::GraphTracker::instance().track_function_end(output);
tt::log_debug(tt::LogOp, "Finished C++ ttnn operation: {}", std::string_view{cpp_fully_qualified_name});
return output;
}
Expand Down
13 changes: 9 additions & 4 deletions ttnn/cpp/ttnn/distributed/distributed_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ class ReplicateTensorToMesh : public TensorToMesh {
return tensors;
}

DistributedTensorConfig config() const override { return DistributedTensorConfig{ReplicateTensor{num_devices_}}; }
tt::tt_metal::DistributedTensorConfig config() const override {
return tt::tt_metal::DistributedTensorConfig{ReplicateTensor{num_devices_}};
}

private:
size_t num_devices_ = 0;
Expand All @@ -39,7 +41,9 @@ class ShardTensorToMesh : public TensorToMesh {
return experimental::xtensor::chunk(tensor, num_devices_, shard_dim_);
}

DistributedTensorConfig config() const override { return DistributedTensorConfig{ShardTensor{shard_dim_}}; }
tt::tt_metal::DistributedTensorConfig config() const override {
return tt::tt_metal::DistributedTensorConfig{ShardTensor{shard_dim_}};
}

private:
size_t num_devices_ = 0;
Expand Down Expand Up @@ -94,7 +98,7 @@ class ShardTensorTo2dMesh : public TensorToMesh {
return tensor_shards;
}

DistributedTensorConfig config() const override {
tt::tt_metal::DistributedTensorConfig config() const override {
return DistributedTensorConfig{ShardTensor2D{ShardMesh{mesh_shape_.num_rows, mesh_shape_.num_cols}}};
}

Expand Down Expand Up @@ -178,7 +182,8 @@ std::unique_ptr<MeshToTensor> concat_2d_mesh_to_tensor_composer(MeshDevice& mesh

Tensor distribute_tensor(const Tensor& tensor, MeshDevice& mesh_device, TensorToMesh& mapper) {
TT_FATAL(
tensor.storage_type() != StorageType::MULTI_DEVICE && tensor.storage_type() != StorageType::MULTI_DEVICE_HOST,
tensor.storage_type() != tt::tt_metal::StorageType::MULTI_DEVICE &&
tensor.storage_type() != tt::tt_metal::StorageType::MULTI_DEVICE_HOST,
"TensorToMesh does not support multi-device or multi-device host tensors; got storage type: {}",
tensor.storage_type());
std::vector<Tensor> tensors = mapper.map(tensor);
Expand Down
2 changes: 1 addition & 1 deletion ttnn/cpp/ttnn/distributed/distributed_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class TensorToMesh {
public:
virtual ~TensorToMesh() = default;
virtual std::vector<Tensor> map(const Tensor& tensor) = 0;
virtual DistributedTensorConfig config() const = 0;
virtual tt::tt_metal::DistributedTensorConfig config() const = 0;
};

// Composer interface that aggregates a multi-device tensor into a host tensor.
Expand Down
7 changes: 5 additions & 2 deletions ttnn/cpp/ttnn/events.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@ struct MultiDeviceEvent {
// Single Device APIs
std::shared_ptr<Event> create_event(Device* device);
void record_event(
uint8_t cq_id, const std::shared_ptr<Event>& event, const std::vector<SubDeviceId>& sub_device_ids = {});
uint8_t cq_id,
const std::shared_ptr<Event>& event,
const std::vector<tt::tt_metal::SubDeviceId>& sub_device_ids = {});
void wait_for_event(uint8_t cq_id, const std::shared_ptr<Event>& event);
// Multi Device APIs
MultiDeviceEvent create_event(MeshDevice* mesh_device);
void record_event(uint8_t cq_id, const MultiDeviceEvent& event, const std::vector<SubDeviceId>& sub_device_ids = {});
void record_event(
uint8_t cq_id, const MultiDeviceEvent& event, const std::vector<tt::tt_metal::SubDeviceId>& sub_device_ids = {});
void wait_for_event(uint8_t cq_id, const MultiDeviceEvent& event);

} // namespace ttnn::events
8 changes: 4 additions & 4 deletions ttnn/cpp/ttnn/global_semaphore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ std::shared_ptr<GlobalSemaphore> create_global_semaphore(
return global_semaphore;
}

DeviceAddr get_global_semaphore_address(const std::shared_ptr<GlobalSemaphore>& global_semaphore) {
tt::tt_metal::DeviceAddr get_global_semaphore_address(const std::shared_ptr<GlobalSemaphore>& global_semaphore) {
auto* device = global_semaphore->device();
DeviceAddr address = 0;
tt::tt_metal::DeviceAddr address = 0;
device->push_work([&global_semaphore, &address] { address = global_semaphore->address(); }, /*blocking=*/true);
return address;
}
Expand Down Expand Up @@ -70,8 +70,8 @@ MultiDeviceGlobalSemaphore create_global_semaphore(
}
return multi_device_global_semaphore;
}
std::vector<DeviceAddr> get_global_semaphore_address(const MultiDeviceGlobalSemaphore& global_semaphore) {
std::vector<DeviceAddr> addresses(global_semaphore.global_semaphores.size());
std::vector<tt::tt_metal::DeviceAddr> get_global_semaphore_address(const MultiDeviceGlobalSemaphore& global_semaphore) {
std::vector<tt::tt_metal::DeviceAddr> addresses(global_semaphore.global_semaphores.size());
const auto& global_semaphores = global_semaphore.global_semaphores;
for (uint32_t i = 0; i < global_semaphores.size(); ++i) {
const auto& global_semaphore = global_semaphores[i];
Expand Down
4 changes: 2 additions & 2 deletions ttnn/cpp/ttnn/global_semaphore.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ std::shared_ptr<GlobalSemaphore> create_global_semaphore(
uint32_t initial_value,
BufferType buffer_type = BufferType::L1,
tt::stl::Span<const SubDeviceId> sub_device_ids = {});
DeviceAddr get_global_semaphore_address(const std::shared_ptr<GlobalSemaphore>& global_semaphore);
tt::tt_metal::DeviceAddr get_global_semaphore_address(const std::shared_ptr<GlobalSemaphore>& global_semaphore);
void reset_global_semaphore_value(
const std::shared_ptr<GlobalSemaphore>& global_semaphore,
uint32_t reset_value,
Expand All @@ -35,7 +35,7 @@ MultiDeviceGlobalSemaphore create_global_semaphore(
uint32_t initial_value,
BufferType buffer_type = BufferType::L1,
tt::stl::Span<const SubDeviceId> sub_device_ids = {});
std::vector<DeviceAddr> get_global_semaphore_address(const MultiDeviceGlobalSemaphore& global_semaphore);
std::vector<tt::tt_metal::DeviceAddr> get_global_semaphore_address(const MultiDeviceGlobalSemaphore& global_semaphore);
void reset_global_semaphore_value(
const MultiDeviceGlobalSemaphore& global_semaphore,
uint32_t reset_value,
Expand Down
2 changes: 1 addition & 1 deletion ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ Tensor optimized_conv_new(const Tensor& a, const Tensor &b, std::optional<const
bool enable_subblock_padding,
bool use_non_tile_height
) {
std::vector<Tensor> output_tensors = {Tensor(operation::get_workers_for_op_output({a, b}))};
std::vector<Tensor> output_tensors = {Tensor(tt::tt_metal::operation::get_workers_for_op_output({a, b}))};
operation::launch_op(
[sliding_window_config, output_channels, groups, untilize_out, fuse_relu, parallelization_config, block_config, memory_config, dtype, input_tensor_shape, use_shallow_conv_variant, compute_kernel_config, enable_act_double_buffer, enable_weights_double_buffer, enable_split_reader, enable_subblock_padding, use_non_tile_height]
(const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, const std::vector<std::optional<Tensor>>& optional_output_tensors) mutable -> std::vector<Tensor> {
Expand Down
20 changes: 10 additions & 10 deletions ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ struct OptimizedConvBlockConfig {
uint32_t out_subblock_w_ntiles;
};

operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_v2_new(const Tensor& a, const Tensor &b, const std::optional<const Tensor>& bias,
tt::tt_metal::operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_v2_new(const Tensor& a, const Tensor &b, const std::optional<const Tensor>& bias,
const sliding_window::SlidingWindowConfig& sliding_window_config,
uint32_t output_channels,
uint32_t groups,
bool untilize_out, bool fuse_relu,
const OptimizedConvParallelizationConfig& parallelization_config,
const OptimizedConvBlockConfig& block_config,
DataType dtype,
tt::tt_metal::DataType dtype,
std::array<std::uint32_t, 4> input_tensor_shape,
bool use_shallow_conv_variant,
std::optional<const DeviceComputeKernelConfig> compute_kernel_config,
Expand All @@ -69,8 +69,8 @@ struct OptimizedConvNew {
const uint32_t output_channels;
const uint32_t groups;
bool untilize_out, has_bias, fuse_relu;
MemoryConfig memory_config;
const DataType dtype;
tt::tt_metal::MemoryConfig memory_config;
const tt::tt_metal::DataType dtype;
std::array<std::uint32_t, 4> input_tensor_shape; // For sharded input, input tensor shape is nonsense
bool use_shallow_conv_variant;
const DeviceComputeKernelConfig compute_kernel_config;
Expand All @@ -85,8 +85,8 @@ struct OptimizedConvNew {
bool has_bias, bool fuse_relu,
const OptimizedConvParallelizationConfig& p_config,
const OptimizedConvBlockConfig& b_config,
MemoryConfig memory_config,
DataType dtype,
tt::tt_metal::MemoryConfig memory_config,
tt::tt_metal::DataType dtype,
std::array<std::uint32_t, 4> input_tensor_shape, bool use_shallow_conv_variant,
const DeviceComputeKernelConfig compute_kernel_config, bool enable_act_double_buffer, bool enable_weights_double_buffer, bool enable_split_reader, bool enable_subblock_padding, bool use_non_tile_height) :
output_channels(output_channels),
Expand All @@ -109,9 +109,9 @@ struct OptimizedConvNew {

void validate(const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors) const;
std::vector<TensorSpec> compute_output_specs(const std::vector<Tensor>& input_tensors) const;
operation::ProgramWithCallbacks create_program(const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, std::vector<Tensor> &output_tensors) const;
tt::tt_metal::operation::ProgramWithCallbacks create_program(const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, std::vector<Tensor> &output_tensors) const;

operation::OpPerformanceModel create_op_performance_model(const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, const std::vector<Tensor> &output_tensors) const;
tt::tt_metal::operation::OpPerformanceModel create_op_performance_model(const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, const std::vector<Tensor> &output_tensors) const;

static constexpr auto attribute_names = std::make_tuple(
"parallelization_config",
Expand Down Expand Up @@ -154,8 +154,8 @@ Tensor optimized_conv_new(const Tensor& a, const Tensor &b, std::optional<const
bool untilize_out, bool fuse_relu,
const OptimizedConvParallelizationConfig& parallelization_config,
const OptimizedConvBlockConfig& block_config,
const MemoryConfig& memory_config,
DataType dtype,
const tt::tt_metal::MemoryConfig& memory_config,
tt::tt_metal::DataType dtype,
std::array<std::uint32_t, 4> input_tensor_shape,
bool use_shallow_conv_variant,
const DeviceComputeKernelConfig& compute_kernel_config,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ const uint32_t untilized_padded_out_cb = CBIndex::c_28;
} // namespace CMAKE_UNIQUE_NAMESPACE
} // namespace

operation::ProgramWithCallbacks multi_core_optimized_conv_width_sharded_v2_impl(
tt::tt_metal::operation::ProgramWithCallbacks multi_core_optimized_conv_width_sharded_v2_impl(
tt_metal::Program& program,
const Tensor& a,
const Tensor& b,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace conv2d {

using namespace tt;

operation::ProgramWithCallbacks multi_core_optimized_conv_width_sharded_v2_impl(
tt::tt_metal::operation::ProgramWithCallbacks multi_core_optimized_conv_width_sharded_v2_impl(
tt_metal::Program& program,
const Tensor& a,
const Tensor& b,
Expand Down
Loading

0 comments on commit b4af6c8

Please sign in to comment.