Fix some namespace pollution caused by using namespace tt::tt_metal (…

…#16090)
tenstorrent · Dec 18, 2024 · b4af6c8 · b4af6c8
1 parent 1cc0474
commit b4af6c8
Show file tree

Hide file tree

Showing 44 changed files with 188 additions and 167 deletions.
diff --git a/tt_metal/impl/buffers/buffer.cpp b/tt_metal/impl/buffers/buffer.cpp
@@ -405,7 +405,7 @@ CoreType Buffer::core_type() const {
 }
 
 bool Buffer::is_l1() const {
-    return ::is_l1(buffer_type());
+    return tt::tt_metal::is_l1(buffer_type());
 }
 bool Buffer::is_dram() const {
     return buffer_type() == BufferType::DRAM || buffer_type() == BufferType::TRACE;
@@ -544,7 +544,7 @@ tt_metal::ShardSpec from_json_t<tt_metal::ShardSpec>::operator()(const nlohmann:
     const auto& shard_mode = from_json<tt_metal::ShardMode>(json_object.at("mode"));
     const auto& physical_shard_shape = from_json<std::optional<std::array<uint32_t, 2>>>(json_object.at("physical_shard_shape"));
     if (physical_shard_shape.has_value()) {
-        TT_FATAL(shard_mode == ShardMode::LOGICAL, "Physical shard shape can only be provided in logical sharding mode!");
+        TT_FATAL(shard_mode == tt::tt_metal::ShardMode::LOGICAL, "Physical shard shape can only be provided in logical sharding mode!");
         return tt_metal::ShardSpec{
             from_json<CoreRangeSet>(json_object.at("grid")),
             from_json<std::array<uint32_t, 2>>(json_object.at("shape")),

diff --git a/tt_metal/impl/dispatch/debug_tools.hpp b/tt_metal/impl/dispatch/debug_tools.hpp
@@ -17,6 +17,9 @@ void match_device_program_data_with_host_program_data(const char* host_file, con
 
 // Dumps host-side CQ data into files.
 void dump_cqs(
-    std::ofstream& cq_file, std::ofstream& iq_file, SystemMemoryManager& sysmem_manager, bool dump_raw_data = false);
+    std::ofstream& cq_file,
+    std::ofstream& iq_file,
+    tt::tt_metal::SystemMemoryManager& sysmem_manager,
+    bool dump_raw_data = false);
 
 }  // end namespace internal
diff --git a/ttnn/cpp/ttnn/decorators.hpp b/ttnn/cpp/ttnn/decorators.hpp
@@ -79,8 +79,8 @@ inline auto create_async_output_tensors(
         Tensors output_tensors;
         output_tensors.reserve(std::tuple_size_v<execute_on_worker_thread_return_t>);
         for (auto index = 0; index < std::tuple_size_v<execute_on_worker_thread_return_t>; index++) {
-            output_tensors.emplace_back(
-                Tensor(operation::get_workers_for_op_output(inputs, optional_inputs, enable_autoformat_device)));
+            output_tensors.emplace_back(Tensor(
+                tt::tt_metal::operation::get_workers_for_op_output(inputs, optional_inputs, enable_autoformat_device)));
         }
         return output_tensors;
     } else {
@@ -265,7 +265,7 @@ struct registered_operation_t {
             detail::extract_args_to_vector<std::optional<ttnn::Tensor>>(std::forward<args_t>(args)...);
 
         bool enable_autoformat = false;
-        operation::launch_op(
+        tt::tt_metal::operation::launch_op(
             [args...](
                 const Tensors& input_tensors,
                 const OptionalConstTensors& optional_input_tensors,
@@ -311,7 +311,7 @@ struct registered_operation_t {
     template <typename... args_t>
     auto operator()(args_t&&... args) const {
         tt::log_debug(tt::LogOp, "Started   C++ ttnn operation: {}", std::string_view{cpp_fully_qualified_name});
-        GraphTracker::instance().track_function_start(cpp_fully_qualified_name, args...);
+        tt::tt_metal::GraphTracker::instance().track_function_start(cpp_fully_qualified_name, args...);
         auto output = invoke(std::forward<args_t>(args)...);
 
         // Should every output tensor be tracked?
@@ -321,7 +321,7 @@ struct registered_operation_t {
         }
         */
 
-        GraphTracker::instance().track_function_end(output);
+        tt::tt_metal::GraphTracker::instance().track_function_end(output);
         tt::log_debug(tt::LogOp, "Finished  C++ ttnn operation: {}", std::string_view{cpp_fully_qualified_name});
         return output;
     }

diff --git a/ttnn/cpp/ttnn/distributed/distributed_tensor.cpp b/ttnn/cpp/ttnn/distributed/distributed_tensor.cpp
@@ -25,7 +25,9 @@ class ReplicateTensorToMesh : public TensorToMesh {
         return tensors;
     }
 
-    DistributedTensorConfig config() const override { return DistributedTensorConfig{ReplicateTensor{num_devices_}}; }
+    tt::tt_metal::DistributedTensorConfig config() const override {
+        return tt::tt_metal::DistributedTensorConfig{ReplicateTensor{num_devices_}};
+    }
 
 private:
     size_t num_devices_ = 0;
@@ -39,7 +41,9 @@ class ShardTensorToMesh : public TensorToMesh {
         return experimental::xtensor::chunk(tensor, num_devices_, shard_dim_);
     }
 
-    DistributedTensorConfig config() const override { return DistributedTensorConfig{ShardTensor{shard_dim_}}; }
+    tt::tt_metal::DistributedTensorConfig config() const override {
+        return tt::tt_metal::DistributedTensorConfig{ShardTensor{shard_dim_}};
+    }
 
 private:
     size_t num_devices_ = 0;
@@ -94,7 +98,7 @@ class ShardTensorTo2dMesh : public TensorToMesh {
         return tensor_shards;
     }
 
-    DistributedTensorConfig config() const override {
+    tt::tt_metal::DistributedTensorConfig config() const override {
         return DistributedTensorConfig{ShardTensor2D{ShardMesh{mesh_shape_.num_rows, mesh_shape_.num_cols}}};
     }
 
@@ -178,7 +182,8 @@ std::unique_ptr<MeshToTensor> concat_2d_mesh_to_tensor_composer(MeshDevice& mesh
 
 Tensor distribute_tensor(const Tensor& tensor, MeshDevice& mesh_device, TensorToMesh& mapper) {
     TT_FATAL(
-        tensor.storage_type() != StorageType::MULTI_DEVICE && tensor.storage_type() != StorageType::MULTI_DEVICE_HOST,
+        tensor.storage_type() != tt::tt_metal::StorageType::MULTI_DEVICE &&
+            tensor.storage_type() != tt::tt_metal::StorageType::MULTI_DEVICE_HOST,
         "TensorToMesh does not support multi-device or multi-device host tensors; got storage type: {}",
         tensor.storage_type());
     std::vector<Tensor> tensors = mapper.map(tensor);

diff --git a/ttnn/cpp/ttnn/distributed/distributed_tensor.hpp b/ttnn/cpp/ttnn/distributed/distributed_tensor.hpp
@@ -14,7 +14,7 @@ class TensorToMesh {
 public:
     virtual ~TensorToMesh() = default;
     virtual std::vector<Tensor> map(const Tensor& tensor) = 0;
-    virtual DistributedTensorConfig config() const = 0;
+    virtual tt::tt_metal::DistributedTensorConfig config() const = 0;
 };
 
 // Composer interface that aggregates a multi-device tensor into a host tensor.

diff --git a/ttnn/cpp/ttnn/events.hpp b/ttnn/cpp/ttnn/events.hpp
@@ -17,11 +17,14 @@ struct MultiDeviceEvent {
 // Single Device APIs
 std::shared_ptr<Event> create_event(Device* device);
 void record_event(
-    uint8_t cq_id, const std::shared_ptr<Event>& event, const std::vector<SubDeviceId>& sub_device_ids = {});
+    uint8_t cq_id,
+    const std::shared_ptr<Event>& event,
+    const std::vector<tt::tt_metal::SubDeviceId>& sub_device_ids = {});
 void wait_for_event(uint8_t cq_id, const std::shared_ptr<Event>& event);
 // Multi Device APIs
 MultiDeviceEvent create_event(MeshDevice* mesh_device);
-void record_event(uint8_t cq_id, const MultiDeviceEvent& event, const std::vector<SubDeviceId>& sub_device_ids = {});
+void record_event(
+    uint8_t cq_id, const MultiDeviceEvent& event, const std::vector<tt::tt_metal::SubDeviceId>& sub_device_ids = {});
 void wait_for_event(uint8_t cq_id, const MultiDeviceEvent& event);
 
 }  // namespace ttnn::events
diff --git a/ttnn/cpp/ttnn/global_semaphore.cpp b/ttnn/cpp/ttnn/global_semaphore.cpp
@@ -33,9 +33,9 @@ std::shared_ptr<GlobalSemaphore> create_global_semaphore(
     return global_semaphore;
 }
 
-DeviceAddr get_global_semaphore_address(const std::shared_ptr<GlobalSemaphore>& global_semaphore) {
+tt::tt_metal::DeviceAddr get_global_semaphore_address(const std::shared_ptr<GlobalSemaphore>& global_semaphore) {
     auto* device = global_semaphore->device();
-    DeviceAddr address = 0;
+    tt::tt_metal::DeviceAddr address = 0;
     device->push_work([&global_semaphore, &address] { address = global_semaphore->address(); }, /*blocking=*/true);
     return address;
 }
@@ -70,8 +70,8 @@ MultiDeviceGlobalSemaphore create_global_semaphore(
     }
     return multi_device_global_semaphore;
 }
-std::vector<DeviceAddr> get_global_semaphore_address(const MultiDeviceGlobalSemaphore& global_semaphore) {
-    std::vector<DeviceAddr> addresses(global_semaphore.global_semaphores.size());
+std::vector<tt::tt_metal::DeviceAddr> get_global_semaphore_address(const MultiDeviceGlobalSemaphore& global_semaphore) {
+    std::vector<tt::tt_metal::DeviceAddr> addresses(global_semaphore.global_semaphores.size());
     const auto& global_semaphores = global_semaphore.global_semaphores;
     for (uint32_t i = 0; i < global_semaphores.size(); ++i) {
         const auto& global_semaphore = global_semaphores[i];

diff --git a/ttnn/cpp/ttnn/global_semaphore.hpp b/ttnn/cpp/ttnn/global_semaphore.hpp
@@ -22,7 +22,7 @@ std::shared_ptr<GlobalSemaphore> create_global_semaphore(
     uint32_t initial_value,
     BufferType buffer_type = BufferType::L1,
     tt::stl::Span<const SubDeviceId> sub_device_ids = {});
-DeviceAddr get_global_semaphore_address(const std::shared_ptr<GlobalSemaphore>& global_semaphore);
+tt::tt_metal::DeviceAddr get_global_semaphore_address(const std::shared_ptr<GlobalSemaphore>& global_semaphore);
 void reset_global_semaphore_value(
     const std::shared_ptr<GlobalSemaphore>& global_semaphore,
     uint32_t reset_value,
@@ -35,7 +35,7 @@ MultiDeviceGlobalSemaphore create_global_semaphore(
     uint32_t initial_value,
     BufferType buffer_type = BufferType::L1,
     tt::stl::Span<const SubDeviceId> sub_device_ids = {});
-std::vector<DeviceAddr> get_global_semaphore_address(const MultiDeviceGlobalSemaphore& global_semaphore);
+std::vector<tt::tt_metal::DeviceAddr> get_global_semaphore_address(const MultiDeviceGlobalSemaphore& global_semaphore);
 void reset_global_semaphore_value(
     const MultiDeviceGlobalSemaphore& global_semaphore,
     uint32_t reset_value,

diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp
@@ -72,7 +72,7 @@ Tensor optimized_conv_new(const Tensor& a, const Tensor &b, std::optional<const
     bool enable_subblock_padding,
     bool use_non_tile_height
 ) {
-    std::vector<Tensor> output_tensors = {Tensor(operation::get_workers_for_op_output({a, b}))};
+    std::vector<Tensor> output_tensors = {Tensor(tt::tt_metal::operation::get_workers_for_op_output({a, b}))};
     operation::launch_op(
         [sliding_window_config, output_channels, groups, untilize_out, fuse_relu, parallelization_config, block_config, memory_config, dtype, input_tensor_shape, use_shallow_conv_variant, compute_kernel_config, enable_act_double_buffer, enable_weights_double_buffer, enable_split_reader, enable_subblock_padding, use_non_tile_height]
             (const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, const std::vector<std::optional<Tensor>>& optional_output_tensors) mutable -> std::vector<Tensor> {

diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.hpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.hpp
@@ -43,14 +43,14 @@ struct OptimizedConvBlockConfig {
     uint32_t out_subblock_w_ntiles;
 };
 
-operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_v2_new(const Tensor& a, const Tensor &b, const std::optional<const Tensor>& bias,
+tt::tt_metal::operation::ProgramWithCallbacks multi_core_optimized_conv_sharded_v2_new(const Tensor& a, const Tensor &b, const std::optional<const Tensor>& bias,
     const sliding_window::SlidingWindowConfig& sliding_window_config,
     uint32_t output_channels,
     uint32_t groups,
     bool untilize_out, bool fuse_relu,
     const OptimizedConvParallelizationConfig& parallelization_config,
     const OptimizedConvBlockConfig& block_config,
-    DataType dtype,
+    tt::tt_metal::DataType dtype,
     std::array<std::uint32_t, 4> input_tensor_shape,
     bool use_shallow_conv_variant,
     std::optional<const DeviceComputeKernelConfig> compute_kernel_config,
@@ -69,8 +69,8 @@ struct OptimizedConvNew {
     const uint32_t output_channels;
     const uint32_t groups;
     bool untilize_out, has_bias, fuse_relu;
-    MemoryConfig memory_config;
-    const DataType dtype;
+    tt::tt_metal::MemoryConfig memory_config;
+    const tt::tt_metal::DataType dtype;
     std::array<std::uint32_t, 4> input_tensor_shape; // For sharded input, input tensor shape is nonsense
     bool use_shallow_conv_variant;
     const DeviceComputeKernelConfig compute_kernel_config;
@@ -85,8 +85,8 @@ struct OptimizedConvNew {
         bool has_bias, bool fuse_relu,
         const OptimizedConvParallelizationConfig& p_config,
         const OptimizedConvBlockConfig& b_config,
-        MemoryConfig memory_config,
-        DataType dtype,
+        tt::tt_metal::MemoryConfig memory_config,
+        tt::tt_metal::DataType dtype,
         std::array<std::uint32_t, 4> input_tensor_shape, bool use_shallow_conv_variant,
         const DeviceComputeKernelConfig compute_kernel_config, bool enable_act_double_buffer, bool enable_weights_double_buffer, bool enable_split_reader, bool enable_subblock_padding, bool use_non_tile_height) :
             output_channels(output_channels),
@@ -109,9 +109,9 @@ struct OptimizedConvNew {
 
     void validate(const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors) const;
     std::vector<TensorSpec> compute_output_specs(const std::vector<Tensor>& input_tensors) const;
-    operation::ProgramWithCallbacks create_program(const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, std::vector<Tensor> &output_tensors) const;
+    tt::tt_metal::operation::ProgramWithCallbacks create_program(const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, std::vector<Tensor> &output_tensors) const;
 
-    operation::OpPerformanceModel create_op_performance_model(const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, const std::vector<Tensor> &output_tensors) const;
+    tt::tt_metal::operation::OpPerformanceModel create_op_performance_model(const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, const std::vector<Tensor> &output_tensors) const;
 
     static constexpr auto attribute_names = std::make_tuple(
         "parallelization_config",
@@ -154,8 +154,8 @@ Tensor optimized_conv_new(const Tensor& a, const Tensor &b, std::optional<const
     bool untilize_out, bool fuse_relu,
     const OptimizedConvParallelizationConfig& parallelization_config,
     const OptimizedConvBlockConfig& block_config,
-    const MemoryConfig& memory_config,
-    DataType dtype,
+    const tt::tt_metal::MemoryConfig& memory_config,
+    tt::tt_metal::DataType dtype,
     std::array<std::uint32_t, 4> input_tensor_shape,
     bool use_shallow_conv_variant,
     const DeviceComputeKernelConfig& compute_kernel_config,

diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_sharded_program_factory.cpp
@@ -37,7 +37,7 @@ const uint32_t untilized_padded_out_cb = CBIndex::c_28;
 }  // namespace CMAKE_UNIQUE_NAMESPACE
 }  // namespace
 
-operation::ProgramWithCallbacks multi_core_optimized_conv_width_sharded_v2_impl(
+tt::tt_metal::operation::ProgramWithCallbacks multi_core_optimized_conv_width_sharded_v2_impl(
     tt_metal::Program& program,
     const Tensor& a,
     const Tensor& b,

diff --git a/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_width_sharded_program_factory.cpp b/ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op_width_sharded_program_factory.cpp
@@ -19,7 +19,7 @@ namespace conv2d {
 
 using namespace tt;
 
-operation::ProgramWithCallbacks multi_core_optimized_conv_width_sharded_v2_impl(
+tt::tt_metal::operation::ProgramWithCallbacks multi_core_optimized_conv_width_sharded_v2_impl(
     tt_metal::Program& program,
     const Tensor& a,
     const Tensor& b,