Skip to content

Commit

Permalink
#0: Update device creations functions to use num_command_queues inste…
Browse files Browse the repository at this point in the history
…ad of num_hw_cqs to match mesh_device creation functions
  • Loading branch information
tt-aho committed Sep 11, 2024
1 parent ae01ad7 commit 74267d4
Show file tree
Hide file tree
Showing 10 changed files with 26 additions and 22 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
For the latest model updates and features, please see [MODEL_UPDATES.md](models/MODEL_UPDATES.md)

## TT-NN Tech Reports
- [Advanced Performance Optimizations for Models](./tech_reports/AdvancedPerformanceOperationsForModels/AdvancedPerformanceOptimizationsForModels.md) (updated Sept 8th)
- [Advanced Performance Optimizations for Models](./tech_reports/AdvancedPerformanceOperationsForModels/AdvancedPerformanceOptimizationsForModels.md) (updated Sept 11th)
- [Programming Mesh of Devices](./tech_reports/Programming%20Mesh%20of%20Devices/Programming%20Mesh%20of%20Devices%20with%20TT-NN.md) (updated Sept 9th)
---

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_perf_trace(

@run_for_grayskull()
@pytest.mark.models_performance_bare_metal
@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768, "num_hw_cqs": 2}], indirect=True)
@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768, "num_command_queues": 2}], indirect=True)
@pytest.mark.parametrize(
"batch_size, expected_inference_time, expected_compile_time",
((20, 0.0100, 19),),
Expand Down Expand Up @@ -99,7 +99,7 @@ def test_perf_2cqs(
@run_for_grayskull()
@pytest.mark.models_performance_bare_metal
@pytest.mark.parametrize(
"device_params", [{"l1_small_size": 32768, "trace_region_size": 1332224, "num_hw_cqs": 2}], indirect=True
"device_params", [{"l1_small_size": 32768, "trace_region_size": 1332224, "num_command_queues": 2}], indirect=True
)
@pytest.mark.parametrize(
"batch_size, expected_inference_time, expected_compile_time",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_run_resnet50_trace_inference(


@run_for_grayskull()
@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768, "num_hw_cqs": 2}], indirect=True)
@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768, "num_command_queues": 2}], indirect=True)
@pytest.mark.parametrize(
"batch_size, act_dtype, weight_dtype, math_fidelity",
((20, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
Expand All @@ -65,7 +65,7 @@ def test_run_resnet50_2cqs_inference(

@run_for_grayskull()
@pytest.mark.parametrize(
"device_params", [{"l1_small_size": 32768, "trace_region_size": 1332224, "num_hw_cqs": 2}], indirect=True
"device_params", [{"l1_small_size": 32768, "trace_region_size": 1332224, "num_command_queues": 2}], indirect=True
)
@pytest.mark.parametrize(
"batch_size, act_dtype, weight_dtype, math_fidelity",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def test_perf_trace(

@run_for_wormhole_b0()
@pytest.mark.models_performance_bare_metal
@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768, "num_hw_cqs": 2}], indirect=True)
@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768, "num_command_queues": 2}], indirect=True)
@pytest.mark.parametrize(
"batch_size, expected_inference_time, expected_compile_time",
((16, 0.0070, 26),),
Expand Down Expand Up @@ -98,7 +98,7 @@ def test_perf_2cqs(
@run_for_wormhole_b0()
@pytest.mark.models_performance_bare_metal
@pytest.mark.parametrize(
"device_params", [{"l1_small_size": 32768, "num_hw_cqs": 2, "trace_region_size": 1332224}], indirect=True
"device_params", [{"l1_small_size": 32768, "num_command_queues": 2, "trace_region_size": 1332224}], indirect=True
)
@pytest.mark.parametrize(
"batch_size, expected_inference_time, expected_compile_time",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_run_resnet50_trace_inference(


@run_for_wormhole_b0()
@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576, "num_hw_cqs": 2}], indirect=True)
@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576, "num_command_queues": 2}], indirect=True)
@pytest.mark.parametrize(
"batch_size, act_dtype, weight_dtype, math_fidelity",
((16, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
Expand All @@ -67,7 +67,7 @@ def test_run_resnet50_2cqs_inference(

@run_for_wormhole_b0()
@pytest.mark.parametrize(
"device_params", [{"l1_small_size": 24576, "trace_region_size": 800768, "num_hw_cqs": 2}], indirect=True
"device_params", [{"l1_small_size": 24576, "trace_region_size": 800768, "num_command_queues": 2}], indirect=True
)
@pytest.mark.parametrize(
"batch_size, act_dtype, weight_dtype, math_fidelity",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ Using a second command queue only for writes enables us to eliminate the gap bet

In order to use multiple command queues, we need to be familiar with the following apis:

* `num_hw_cqs`/`num_command_queues` (Currently dependent on whether we are running with single or multi device fixture)
* `num_command_queues`

This is a parameter to the device creation api, and sets how many command queues to create the device with. The default is one, and the max is two. In pytest, we can pass this using the `device_params` fixture:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def test_bert_linear_1cq_initialized(
)

@pytest.mark.parametrize("cq_id", [0])
@pytest.mark.parametrize("device_params", [{"trace_region_size": 34816, "num_hw_cqs": 2}], indirect=True)
@pytest.mark.parametrize("device_params", [{"trace_region_size": 34816, "num_command_queues": 2}], indirect=True)
def test_bert_linear_2cqs_initialized(
self,
device,
Expand Down
4 changes: 2 additions & 2 deletions tests/ttnn/unit_tests/test_device_synchronize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def is_eth_dispatch():


@pytest.mark.skipif(is_wormhole_b0() and not is_eth_dispatch(), reason="Requires eth dispatch to run on WH")
@pytest.mark.parametrize("device_params", [{"num_hw_cqs": 2}], indirect=True)
@pytest.mark.parametrize("device_params", [{"num_command_queues": 2}], indirect=True)
def test_read_write_full_synchronize(device):
zeros = torch.zeros([1, 1, 65536, 2048]).bfloat16()
input = torch.randn([1, 1, 65536, 2048]).bfloat16()
Expand Down Expand Up @@ -48,7 +48,7 @@ def test_read_write_full_synchronize(device):


@pytest.mark.skipif(is_wormhole_b0() and not is_eth_dispatch(), reason="Requires eth dispatch to run on WH")
@pytest.mark.parametrize("device_params", [{"num_hw_cqs": 2}], indirect=True)
@pytest.mark.parametrize("device_params", [{"num_command_queues": 2}], indirect=True)
def test_read_write_cq_synchronize(device):
zeros = torch.zeros([1, 1, 65536, 2048]).bfloat16()
input = torch.randn([1, 1, 65536, 2048]).bfloat16()
Expand Down
10 changes: 5 additions & 5 deletions ttnn/cpp/pybind11/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ void device_module(py::module &m_device) {

m_device.def(
"CreateDevice",
[](int device_id, uint8_t num_hw_cqs, size_t l1_small_size, size_t trace_region_size, tt::tt_metal::DispatchCoreType dispatch_core_type) { return tt::tt_metal::CreateDevice(device_id, num_hw_cqs, l1_small_size, trace_region_size, dispatch_core_type); },
[](int device_id, uint8_t num_command_queues, size_t l1_small_size, size_t trace_region_size, tt::tt_metal::DispatchCoreType dispatch_core_type) { return tt::tt_metal::CreateDevice(device_id, num_command_queues, l1_small_size, trace_region_size, dispatch_core_type); },
R"doc(
Creates an instance of TT device.
Expand All @@ -143,14 +143,14 @@ void device_module(py::module &m_device) {
+------------------+------------------------+---------------------+------------------------------+----------+
)doc",
py::arg("device_id"),
py::arg("num_hw_cqs") = 1,
py::arg("num_command_queues") = 1,
py::arg("l1_small_size") = DEFAULT_L1_SMALL_SIZE,
py::arg("trace_region_size") = DEFAULT_TRACE_REGION_SIZE,
py::arg("dispatch_core_type") = tt::tt_metal::DispatchCoreType::WORKER);
m_device.def(
"CreateDevices",
[](std::vector<int> device_ids, uint8_t num_hw_cqs, size_t l1_small_size, size_t trace_region_size, tt::tt_metal::DispatchCoreType dispatch_core_type) {
return tt::tt_metal::detail::CreateDevices(device_ids, num_hw_cqs, l1_small_size, trace_region_size, dispatch_core_type);
[](std::vector<int> device_ids, uint8_t num_command_queues, size_t l1_small_size, size_t trace_region_size, tt::tt_metal::DispatchCoreType dispatch_core_type) {
return tt::tt_metal::detail::CreateDevices(device_ids, num_command_queues, l1_small_size, trace_region_size, dispatch_core_type);
},
R"doc(
Creates an instance of TT device.
Expand All @@ -162,7 +162,7 @@ void device_module(py::module &m_device) {
+------------------+------------------------+---------------------+------------------------------+----------+
)doc",
py::arg("device_ids"),
py::arg("num_hw_cqs") = 1,
py::arg("num_command_queues") = 1,
py::arg("l1_small_size") = DEFAULT_L1_SMALL_SIZE,
py::arg("trace_region_size") = DEFAULT_TRACE_REGION_SIZE,
py::arg("dispatch_core_type") = tt::tt_metal::DispatchCoreType::WORKER);
Expand Down
12 changes: 8 additions & 4 deletions ttnn/ttnn/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,22 +90,26 @@ def GetNumPCIeDevices():

def CreateDevice(
device_id: int,
num_hw_cqs: int = 1,
num_command_queues: int = 1,
l1_small_size: int = ttnn._ttnn.device.DEFAULT_L1_SMALL_SIZE,
trace_region_size: int = ttnn._ttnn.device.DEFAULT_TRACE_REGION_SIZE,
dispatch_core_type: int = DispatchCoreType.WORKER,
):
return ttnn._ttnn.device.CreateDevice(device_id, num_hw_cqs, l1_small_size, trace_region_size, dispatch_core_type)
return ttnn._ttnn.device.CreateDevice(
device_id, num_command_queues, l1_small_size, trace_region_size, dispatch_core_type
)


def CreateDevices(
device_ids: List[int],
num_hw_cqs: int = 1,
num_command_queues: int = 1,
l1_small_size: int = ttnn._ttnn.device.DEFAULT_L1_SMALL_SIZE,
trace_region_size: int = ttnn._ttnn.device.DEFAULT_TRACE_REGION_SIZE,
dispatch_core_type: int = DispatchCoreType.WORKER,
):
return ttnn._ttnn.device.CreateDevices(device_ids, num_hw_cqs, l1_small_size, trace_region_size, dispatch_core_type)
return ttnn._ttnn.device.CreateDevices(
device_ids, num_command_queues, l1_small_size, trace_region_size, dispatch_core_type
)


def CloseDevice(device):
Expand Down

0 comments on commit 74267d4

Please sign in to comment.