-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add rack-plane builders (fabric and host)
TOOD: Scale out network modelling
- Loading branch information
Showing
4 changed files
with
230 additions
and
0 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
"""RackPlaneFabric package | ||
Uses the infra_pb2 protobuf generated code | ||
to capture components, links, and connections of the | ||
RackPlane block diagram. | ||
""" | ||
|
||
from typing import Tuple | ||
|
||
from src.closfabric import ClosFabricSwitch | ||
|
||
if __package__ is None or __package__ == "": | ||
import generated.infra_pb2 as infra | ||
import builders as bld | ||
else: | ||
from .generated import infra_pb2 as infra | ||
from . import builders as bld | ||
|
||
|
||
class RackPlaneFabricBuilder(bld.FabricBuilder): | ||
""" | ||
generates infrastructure of a fabric that | ||
supports connecting to switching via multiple planes | ||
""" | ||
|
||
name: str = "rack plane fabric" | ||
description: str = "fabric that users multiple planes inside a rack" | ||
lowest_device: bld.DeviceBuilder = None | ||
|
||
def __init__(self, host_builder: bld.DeviceBuilder, host_count: int = 1): | ||
super().__init__(self.name) | ||
assert isinstance(host_builder, bld.DeviceBuilder) | ||
|
||
rack_switch, _ = self._add_fabric_devices( | ||
host_builder, | ||
host_count, | ||
"rack switch", | ||
) | ||
self.lowest_device = rack_switch | ||
|
||
device_link = infra.Link( | ||
name="eth", | ||
type=infra.LinkType.LINK_ETHERNET, | ||
) | ||
self.fabric.links[device_link.name].CopyFrom(device_link) | ||
|
||
def _add_fabric_devices( | ||
self, | ||
host_builder: bld.DeviceBuilder, | ||
host_count: int, | ||
device_name: str, | ||
) -> Tuple[bld.DeviceBuilder, int]: | ||
"""Adds fabric switches to the infrastructure | ||
Returns: Tuple of the device and the number of devices | ||
""" | ||
down_link_count = int(host_builder.port_up_component.count * host_count) | ||
up_link_count = 0 | ||
device = ClosFabricSwitch(device_name, down_link_count, up_link_count) | ||
# create one rack switch per host scale up nic | ||
sw_count = host_builder.port_up_component.count | ||
self._add_device(device, sw_count) | ||
return (device, sw_count) | ||
|
||
def _add_device( | ||
self, package_builder: bld.DeviceBuilder, device_count: int | ||
) -> None: | ||
if package_builder is not None: | ||
self.fabric.devices[package_builder.device.name].CopyFrom( | ||
infra.DeviceCount( | ||
count=device_count, | ||
device=package_builder.device, | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
"""RackPlaneHost package | ||
Uses the infra_pb2 protobuf generated code | ||
to capture components, links, and connections of the | ||
RackPlane block diagram. | ||
""" | ||
|
||
if __package__ is None or __package__ == "": | ||
import generated.infra_pb2 as infra | ||
import builders as bld | ||
else: | ||
from .generated import infra_pb2 as infra | ||
from . import builders as bld | ||
|
||
|
||
class RackPlaneHostBuilder(bld.HostBuilder): | ||
""" | ||
generates infrastructure of a host that | ||
supports connecting to switching via multiple planes | ||
""" | ||
|
||
name = "rack plane host" | ||
description = "a host with dedicated scale up and scale out NICs" | ||
|
||
def __init__( | ||
self, npu_count: int, scale_up_nic_count: int, scale_out_nic_count: int | ||
): | ||
super(RackPlaneHostBuilder).__init__() | ||
# 1. Add components | ||
npu = infra.Component(name="npu", count=npu_count, npu=infra.Npu()) | ||
scale_up_nic = infra.Component( | ||
name="scale-up-nic", count=scale_up_nic_count, nic=infra.Nic() | ||
) | ||
self._port_component = scale_up_nic | ||
|
||
# TODO: Scale OUT NICs | ||
# scale_out_nic = infra.Component( | ||
# name="scale-out-nic", count=scale_out_nic_count, nic=infra.Nic() | ||
# ) | ||
|
||
# 2. Add device | ||
self._device = infra.Device( | ||
name=self.name, | ||
components={ | ||
npu.name: npu, | ||
scale_up_nic.name: scale_up_nic, | ||
# scale_out_nic.name: scale_out_nic, | ||
}, | ||
) | ||
|
||
# 3. Add component links | ||
# scale UP NICs to NPU connections | ||
for c1_index in range(npu.count): | ||
for c2_index in range(scale_up_nic.count): | ||
self._add_component_link( | ||
npu.name, | ||
c1_index, | ||
f"{npu.name}.{c1_index}.to.{scale_up_nic.name}.{c2_index}", | ||
scale_up_nic.name, | ||
c2_index, | ||
) | ||
|
||
# scale OUT NICs to NPU connections | ||
for c1_index in range(npu.count): | ||
for c2_index in range(scale_up_nic.count): | ||
self._add_component_link( | ||
npu.name, | ||
c1_index, | ||
f"{npu.name}.{c1_index=}.to.{scale_up_nic.name}.{c2_index}", | ||
scale_up_nic.name, | ||
c2_index, | ||
) | ||
|
||
@property | ||
def port_up_component(self) -> infra.Component: | ||
return self._port_component |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
"""rack plane related unit tests""" | ||
|
||
import pytest | ||
|
||
if __package__ is None or __package__ == "": | ||
from src.generated import infra_pb2 | ||
from src.rack_plane_host import RackPlaneHostBuilder | ||
from src.rack_plane_fabric import RackPlaneFabricBuilder | ||
from src.infrastructure import Infrastructure | ||
else: | ||
from .generated import infra_pb2 | ||
from keysight_chakra.rack_plane_host import RackPlaneHostBuilder | ||
from keysight_chakra.rack_plane_fabric import RackPlaneFabricBuilder | ||
from keysight_chakra.infrastructure import Infrastructure | ||
|
||
|
||
@pytest.mark.parametrize("host_count", [2, 3, 4, 8]) | ||
@pytest.mark.parametrize("sup_nic_count", [2, 3, 4]) | ||
def test_rack_plane_fabric_and_host(host_count: int, sup_nic_count: int): | ||
"""verifies that the correct infrastructure can be created from rack_plane fabric/host""" | ||
rp_host_builder = RackPlaneHostBuilder( | ||
npu_count=1, scale_up_nic_count=sup_nic_count, scale_out_nic_count=1 | ||
) | ||
rp_fabric_builder = RackPlaneFabricBuilder(host_builder=rp_host_builder) | ||
infra_builder = Infrastructure( | ||
host_device=rp_host_builder, | ||
host_devices=host_count, | ||
fabric=rp_fabric_builder, | ||
assignment_scheme="ROUND_ROBIN", | ||
) | ||
infrastructure = infra_builder.infrastructure | ||
|
||
assert infrastructure is not None | ||
# loose check confirming the correct number of connections | ||
# between host and rack switches | ||
assert len(infrastructure.connections) == host_count * sup_nic_count | ||
|
||
|
||
def test_rack_plane_fabric_and_host_detailed(): | ||
"""verifies that the correct infrastructure can be created from rack_plane fabric/host""" | ||
sup_nic_count = 2 | ||
host_count = 2 | ||
rp_host_builder = RackPlaneHostBuilder( | ||
npu_count=1, scale_up_nic_count=sup_nic_count, scale_out_nic_count=1 | ||
) | ||
rp_fabric_builder = RackPlaneFabricBuilder(host_builder=rp_host_builder) | ||
infra_builder = Infrastructure( | ||
host_device=rp_host_builder, | ||
host_devices=host_count, | ||
fabric=rp_fabric_builder, | ||
assignment_scheme="ROUND_ROBIN", | ||
) | ||
infrastructure = infra_builder.infrastructure | ||
|
||
assert infrastructure is not None | ||
assert len(infrastructure.connections) == host_count * sup_nic_count | ||
|
||
# now let's confirm every details of the DeviceConnections | ||
def assert_device_conn( | ||
dev_conn: infra_pb2.DeviceConnection, | ||
d1_index: int, | ||
c1_index: int, | ||
d2_index: int, | ||
c2_index: int, | ||
): | ||
assert dev_conn.link.d1 == "rack plane host" | ||
assert dev_conn.link.c1 == "scale-up-nic" | ||
assert dev_conn.link.d2 == "rack switch" | ||
assert dev_conn.link.c2 == "port-down" | ||
assert dev_conn.link.link == "eth" | ||
assert dev_conn.link.d1_index == d1_index | ||
assert dev_conn.link.c1_index == c1_index | ||
assert dev_conn.link.d2_index == d2_index | ||
assert dev_conn.link.c2_index == c2_index | ||
|
||
# plane 0 d1,c1,d2,c2 | ||
assert_device_conn(infrastructure.connections[0], 0, 0, 0, 0) | ||
assert_device_conn(infrastructure.connections[1], 0, 1, 1, 0) | ||
# plane 1 (and thus rack switch 1; and scale up nic 1 on all hosts) | ||
assert_device_conn(infrastructure.connections[2], 1, 0, 0, 1) | ||
assert_device_conn(infrastructure.connections[3], 1, 1, 1, 1) |