-
Notifications
You must be signed in to change notification settings - Fork 4
deBruijn Blueprint #51
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Monjistha99
wants to merge
5
commits into
main
Choose a base branch
from
debruijn-blueprint
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
12174f6
added de bruijn blueprints
Monjistha99 6654216
Update test_multi_host_debruijn_fabric.py
Monjistha99 7511be0
Update test_hybrid_debruijn_fabric.py
Monjistha99 959d396
Updated DeBruijn fabric blueprints and tests
Monjistha99 feb9e4f
updated hybrid and multihost debruijn
Monjistha99 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
134 changes: 134 additions & 0 deletions
134
src/infragraph/blueprints/fabrics/hybrid_debruijn_fabric.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,134 @@ | ||
| from infragraph import * | ||
| from infragraph.infragraph_service import InfraGraphService | ||
| import itertools | ||
|
|
||
| class HybridDeBruijnFabric(Infrastructure): | ||
| """ | ||
| A Hybrid of De Bruijn Fabric and Clos Fabric with Access Layer of Rack Switches | ||
|
|
||
| Inputs: | ||
| switch : fabric switch device | ||
| server : host device | ||
| order : order of DeBruijn graph | ||
|
|
||
| Derived: | ||
| switch_port.count = switch radix | ||
| degree = switch_port.count / 8 | ||
|
|
||
| Fabric Switch Ports: | ||
| 0..degree-1 primary outgoing | ||
| degree..2degree-1 redundant outgoing | ||
| 2degree..3degree-1 primary incoming | ||
| 3degree..4degree-1 redundant incoming | ||
| 4degree..switch_port.count-1 access switch uplinks | ||
|
|
||
| Access Switch Ports: | ||
| 0..(switch_port.count/2 -1) hosts | ||
| remaining fabric switch connection + unused | ||
|
|
||
| """ | ||
|
|
||
| def __init__(self, switch: Device, server: Device, order: int): | ||
| super().__init__( | ||
| name="hybrid-debruijn-fabric", | ||
| description=f"DeBruijn Fabric With Rack Switches(order={order})", | ||
| ) | ||
|
|
||
| switch_port = InfraGraphService.get_component(switch, Component.PORT) | ||
| host_nic = InfraGraphService.get_component(server, Component.NIC) | ||
|
|
||
| # The switch radix must divide evenly across the full port plan: | ||
| # half the ports are for fabric links and half are for host/access links; | ||
| # within the fabric half, ports are split into incoming and outgoing; | ||
| # within both incoming and outgoing groups, ports are split again into | ||
| # primary and redundant links. Therefore switch port must be divisible by 8 (2*2*2) | ||
| if switch_port.count % 8 != 0: | ||
| raise ValueError("Switch radix must be divisible by 8") | ||
|
|
||
| # degree of graph = connected neighbour nodes | ||
| degree = switch_port.count // 8 | ||
| host_ports = switch_port.count // 2 | ||
|
|
||
| if degree < 1: | ||
| raise ValueError("Not enough switch ports") | ||
|
|
||
| if host_ports % host_nic.count != 0: | ||
| raise ValueError("Host NIC count must divide available host ports") | ||
|
|
||
| # Each access switch dedicates half of its ports to hosts | ||
| # host count is based on NICs per host | ||
| hosts_per_access_switch = host_ports // host_nic.count | ||
|
|
||
| self.devices.append(switch) | ||
| self.devices.append(server) | ||
|
|
||
| # Build de bruijn node labels. | ||
| # For degree d and order n, the fabric has d^n switches, each having unique label | ||
| alphabet = [str(i) for i in range(degree)] | ||
| nodes = ["".join(p) for p in itertools.product(alphabet, repeat=order)] | ||
| num_switches = len(nodes) | ||
|
|
||
| # Create one fabric switch and one access switch per de bruijn node | ||
| fabric_switches = self.instances.add(name="fabric_switch", device=switch.name, count=num_switches) | ||
| access_switches = self.instances.add(name="access_switch", device=switch.name, count=num_switches) | ||
|
|
||
| # Create Hosts per access/rack switch | ||
| total_hosts = num_switches * hosts_per_access_switch | ||
| hosts = self.instances.add(name="host", device=server.name, count=total_hosts) | ||
| node_index = {node: i for i, node in enumerate(nodes)} | ||
|
|
||
| # Create links | ||
| # fabric link connects fabric switches | ||
| # access links connects fabric switch and access switch | ||
| fabric_link = self.links.add(name="fabric-link", description="DeBruijn fabric connectivity") | ||
| fabric_link.physical.bandwidth.gigabits_per_second = 400 | ||
| access_link = self.links.add(name="access-uplink", description="Access switch to fabric switch") | ||
| access_link.physical.bandwidth.gigabits_per_second = 200 | ||
| host_link = self.links.add(name="host-link", description="Host to access switch") | ||
| host_link.physical.bandwidth.gigabits_per_second = 100 | ||
|
|
||
| # Added de bruijn fabric edges | ||
| # Routing - shifting node label left and appending each alphabet digit of destination node | ||
| # two types of link - primary link, redundant link | ||
| for node in nodes: | ||
| src_idx = node_index[node] | ||
| for i, digit in enumerate(alphabet): | ||
| next_node = node[1:] + digit | ||
| dst_idx = node_index[next_node] | ||
|
|
||
| # primary link | ||
| edge = self.edges.add(scheme=InfrastructureEdge.ONE2ONE, link=fabric_link.name) | ||
| edge.ep1.instance = f"{fabric_switches.name}[{src_idx}]" | ||
| edge.ep1.component = f"{switch_port.name}[{i}]" | ||
| edge.ep2.instance = f"{fabric_switches.name}[{dst_idx}]" | ||
| edge.ep2.component = f"{switch_port.name}[{i + 2*degree}]" | ||
|
|
||
| # redundant link | ||
| edge = self.edges.add(scheme=InfrastructureEdge.ONE2ONE, link=fabric_link.name) | ||
| edge.ep1.instance = f"{fabric_switches.name}[{src_idx}]" | ||
| edge.ep1.component = f"{switch_port.name}[{i + degree}]" | ||
| edge.ep2.instance = f"{fabric_switches.name}[{dst_idx}]" | ||
| edge.ep2.component = f"{switch_port.name}[{i + 3*degree}]" | ||
|
|
||
| # Added access switch to fabric switch edges | ||
| uplink_start = 4 * degree | ||
| for idx in range(num_switches): | ||
| edge = self.edges.add(scheme=InfrastructureEdge.ONE2ONE, link=access_link.name) | ||
| edge.ep1.instance = f"{access_switches.name}[{idx}]" | ||
| edge.ep1.component = f"{switch_port.name}[0]" | ||
| edge.ep2.instance = f"{fabric_switches.name}[{idx}]" | ||
| edge.ep2.component = f"{switch_port.name}[{uplink_start}]" | ||
|
|
||
| # Attach hosts to access switch | ||
| host_index = 0 | ||
| for sw_idx in range(num_switches): | ||
| for h in range(hosts_per_access_switch): | ||
| for nic in range(host_nic.count): | ||
| port_index = h * host_nic.count + nic | ||
| edge = self.edges.add(scheme=InfrastructureEdge.ONE2ONE, link=host_link.name) | ||
| edge.ep1.instance = f"{hosts.name}[{host_index}]" | ||
| edge.ep1.component = f"{host_nic.name}[{nic}]" | ||
| edge.ep2.instance = f"{access_switches.name}[{sw_idx}]" | ||
| edge.ep2.component = f"{switch_port.name}[{port_index}]" | ||
| host_index += 1 | ||
|
|
||
104 changes: 104 additions & 0 deletions
104
src/infragraph/blueprints/fabrics/multi_host_debruijn_fabric.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| from infragraph import * | ||
| from infragraph.infragraph_service import InfraGraphService | ||
| import itertools | ||
|
|
||
|
|
||
| class MultiHostDeBruijnFabric(Infrastructure): | ||
| """ | ||
| DeBruijn Fabric with Multiple Hosts per Switch + Redundant Fabric Links | ||
|
|
||
| Inputs | ||
| switch : fabric switch | ||
| server : host device | ||
| order : DeBruijn order | ||
|
|
||
| Ports | ||
| 0..d-1 primary outgoing | ||
| d..2d-1 redundant outgoing | ||
| 2d..3d-1 primary incoming | ||
| 3d..4d-1 redundant incoming | ||
| 4d..switch_port.count-1 host ports | ||
| """ | ||
|
|
||
| def __init__(self, switch: Device, server: Device, order: int): | ||
| super().__init__( | ||
| name="multi-host-redundant-debruijn", | ||
| description=f"DeBruijn Fabric (k={order})", | ||
| ) | ||
|
|
||
| switch_port = InfraGraphService.get_component(switch, Component.PORT) | ||
| host_nic = InfraGraphService.get_component(server, Component.NIC) | ||
|
|
||
| degree = switch_port.count // 8 | ||
| host_ports = switch_port.count // 2 | ||
|
|
||
| if degree < 1: | ||
| raise ValueError("Not enough switch ports") | ||
|
|
||
| if host_ports % host_nic.count != 0: | ||
| raise ValueError( | ||
| f"Host NICs ({host_nic.count}) must divide available host ports ({host_ports})" | ||
| ) | ||
|
|
||
| # Each access switch dedicates half of its ports to hosts | ||
| # host count is based on NICs per host | ||
| hosts_per_switch = host_ports // host_nic.count | ||
|
|
||
| self.devices.append(switch) | ||
| self.devices.append(server) | ||
|
|
||
| # Build de bruijn node labels. | ||
| # For degree d and order n, the fabric has d^n switches, each having unique label | ||
| alphabet = [str(i) for i in range(degree)] | ||
| nodes = ["".join(p) for p in itertools.product(alphabet, repeat=order)] | ||
| num_switches = len(nodes) | ||
|
|
||
| # Create fabric switches and Hosts | ||
| switches = self.instances.add(name="switch", device=switch.name, count=num_switches) | ||
| hosts = self.instances.add(name="host", device=server.name, count=num_switches * hosts_per_switch) | ||
| node_index = {node: i for i, node in enumerate(nodes)} | ||
|
|
||
| # Added links | ||
| # fabric link connects fabric switches | ||
| # host links connects hosts with fabric switches | ||
| fabric_link = self.links.add(name="fabric-link", description="DeBruijn connectivity") | ||
| fabric_link.physical.bandwidth.gigabits_per_second = 400 | ||
| host_link = self.links.add(name="host-link", description="Host to switch connectivity") | ||
| host_link.physical.bandwidth.gigabits_per_second = 100 | ||
|
|
||
| # Added de bruijn fabric edges | ||
| # Routing - shifting node label left and appending each alphabet digit of destination node | ||
| # two types of link - primary link, redundant link | ||
| for node in nodes: | ||
| src_idx = node_index[node] | ||
| for i, digit in enumerate(alphabet): | ||
| next_node = node[1:] + digit | ||
| dst_idx = node_index[next_node] | ||
|
|
||
| # Primary link | ||
| edge = self.edges.add(scheme=InfrastructureEdge.ONE2ONE, link=fabric_link.name) | ||
| edge.ep1.instance = f"{switches.name}[{src_idx}]" | ||
| edge.ep1.component = f"{switch_port.name}[{i}]" | ||
| edge.ep2.instance = f"{switches.name}[{dst_idx}]" | ||
| edge.ep2.component = f"{switch_port.name}[{i + 2*degree}]" | ||
|
|
||
| # Redundant link | ||
| edge = self.edges.add(scheme=InfrastructureEdge.ONE2ONE, link=fabric_link.name) | ||
| edge.ep1.instance = f"{switches.name}[{src_idx}]" | ||
| edge.ep1.component = f"{switch_port.name}[{i + degree}]" | ||
| edge.ep2.instance = f"{switches.name}[{dst_idx}]" | ||
| edge.ep2.component = f"{switch_port.name}[{i + 3*degree}]" | ||
|
|
||
| # Attach hosts to access switch | ||
| host_port_start = 4 * degree | ||
| host_global_idx = 0 | ||
| for sw_idx in range(num_switches): | ||
| for h in range(hosts_per_switch): | ||
| for nic in range(host_nic.count): | ||
| port_offset = h * host_nic.count + nic | ||
| edge = self.edges.add(scheme=InfrastructureEdge.ONE2ONE, link=host_link.name) | ||
| edge.ep1.instance = f"{hosts.name}[{host_global_idx}]" | ||
| edge.ep1.component = f"{host_nic.name}[{nic}]" | ||
| edge.ep2.instance = f"{switches.name}[{sw_idx}]" | ||
| edge.ep2.component = (f"{switch_port.name}[{host_port_start + port_offset}]") | ||
| host_global_idx += 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| from infragraph.infragraph_service import InfraGraphService | ||
| from infragraph.blueprints.devices.generic.server import Server | ||
| from infragraph.blueprints.devices.generic.generic_switch import Switch | ||
| from infragraph.blueprints.fabrics.hybrid_debruijn_fabric import HybridDeBruijnFabric | ||
| from infragraph.blueprints.devices.nvidia.dgx import NvidiaDGX | ||
| import networkx | ||
| import pytest | ||
|
|
||
| DGX_PROFILES = [ | ||
| "dgx1", | ||
| "dgx2", | ||
| "dgx_a100", | ||
| "dgx_h100", | ||
| "dgx_gb200", | ||
| ] | ||
| @pytest.mark.asyncio | ||
| async def test_hybrid_debruijn_fabric(): | ||
| """ | ||
| Generate a hybrid debruijn fabric | ||
|
|
||
| """ | ||
| switch = Switch(port_count=16) | ||
| server = Server() | ||
| fabric = HybridDeBruijnFabric(switch, server, 3) | ||
|
|
||
| service = InfraGraphService() | ||
| service.set_graph(fabric) | ||
|
|
||
| graph = service.get_networkx_graph() | ||
| print(networkx.write_network_text(graph, vertical_chains=True)) | ||
|
|
||
| @pytest.mark.asyncio | ||
| @pytest.mark.parametrize("dgx_profile", DGX_PROFILES) | ||
| async def test_hybrid_debruijn_fabric_with_dgx(dgx_profile): | ||
| """ | ||
| Generate a hybrid debruijn fabric with each supported DGX device | ||
|
|
||
| """ | ||
| switch = Switch(port_count=16) | ||
| dgx = NvidiaDGX(dgx_profile) | ||
| fabric = HybridDeBruijnFabric(switch, dgx, 3) | ||
|
|
||
| service = InfraGraphService() | ||
| service.set_graph(fabric) | ||
|
|
||
| graph = service.get_networkx_graph() | ||
| print(networkx.write_network_text(graph, vertical_chains=True)) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| pytest.main(["-s", __file__]) |
51 changes: 51 additions & 0 deletions
51
src/tests/test_blueprints/test_multi_host_debruijn_fabric.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| from infragraph.infragraph_service import InfraGraphService | ||
| from infragraph.blueprints.devices.generic.server import Server | ||
| from infragraph.blueprints.devices.generic.generic_switch import Switch | ||
| from infragraph.blueprints.fabrics.multi_host_debruijn_fabric import MultiHostDeBruijnFabric | ||
| from infragraph.blueprints.devices.nvidia.dgx import NvidiaDGX | ||
| import networkx | ||
| import pytest | ||
|
|
||
| DGX_PROFILES = [ | ||
| "dgx1", | ||
| "dgx2", | ||
| "dgx_a100", | ||
| "dgx_h100", | ||
| "dgx_gb200", | ||
| ] | ||
| @pytest.mark.asyncio | ||
| async def test_debruijn_multi_host_fabric(): | ||
| """ | ||
| Generate a debruijn fabric with multiple hosts per switch | ||
|
|
||
| """ | ||
| switch = Switch(port_count=16) | ||
| server = Server() | ||
| fabric = MultiHostDeBruijnFabric(switch, server, 3) | ||
|
|
||
| service = InfraGraphService() | ||
| service.set_graph(fabric) | ||
|
|
||
| graph = service.get_networkx_graph() | ||
| print(networkx.write_network_text(graph, vertical_chains=True)) | ||
|
|
||
| @pytest.mark.asyncio | ||
| @pytest.mark.parametrize("dgx_profile", DGX_PROFILES) | ||
| async def test_debruijn_multi_host_fabric_with_dgx(dgx_profile): | ||
| """ | ||
| Generate a debruijn fabric with each supported DGX device | ||
|
|
||
| """ | ||
| switch = Switch(port_count=16) | ||
| dgx = NvidiaDGX(dgx_profile) | ||
| fabric = MultiHostDeBruijnFabric(switch, dgx, 3) | ||
|
|
||
| service = InfraGraphService() | ||
| service.set_graph(fabric) | ||
|
|
||
| graph = service.get_networkx_graph() | ||
| print(networkx.write_network_text(graph, vertical_chains=True)) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| pytest.main(["-s", __file__]) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.