From c9d523601bbc382d1cc3f9d357026122f207b68b Mon Sep 17 00:00:00 2001 From: John Starks Date: Wed, 13 May 2026 21:58:16 +0000 Subject: [PATCH 1/2] vmotherboard, pcie: add Root Complex Integrated Endpoint (RCiEP) infra MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some PCIe devices — most notably an AMD IOMMU — live on the start bus of a root complex as Type 0 PCI functions alongside (not behind) root ports. The existing PCIe switch infra only knew how to attach devices behind downstream ports, so there was no way to wire up an RCiEP. Teach the generic root complex (GenericPcieRootComplex) to hold a map of RCiEPs indexed by combined device/function, and route ECAM accesses on the start bus that don't hit a root port through to a registered RCiEP. Also let new() take a start_device parameter so callers can reserve low device numbers on the start bus for RCiEPs (e.g., put the IOMMU at device 0 and start root ports at device 1). Plumb a matching path through the vmotherboard builder: an on_pcie_root_complex(enumerator_id, device, function) placement method, a register_static_pcie_rciep services helper, a WeakMutexPcieRciepEntry type, an rcieps list on BusResolverWeakMutexPcie, and an add_rciep method on the RegisterWeakMutexPcie trait (with a default impl that returns a new RciepNotSupported PcieConflictReason so non-RC enumerators fail loudly). No callers yet — that comes in a follow-up. This change is purely additive infrastructure. --- openvmm/openvmm_core/src/worker/dispatch.rs | 11 +- vm/devices/pci/pcie/fuzz/fuzz_pcie.rs | 15 +- vm/devices/pci/pcie/src/root.rs | 518 +++++++++++------- vmm_core/vmotherboard/src/base_chipset.rs | 18 + .../src/chipset/backing/arc_mutex/device.rs | 31 +- .../src/chipset/backing/arc_mutex/pci.rs | 52 ++ .../src/chipset/backing/arc_mutex/services.rs | 14 + .../vmotherboard/src/chipset/builder/mod.rs | 21 + vmm_core/vmotherboard/src/chipset/mod.rs | 8 + 9 files changed, 480 insertions(+), 208 deletions(-) diff --git a/openvmm/openvmm_core/src/worker/dispatch.rs b/openvmm/openvmm_core/src/worker/dispatch.rs index 500a5ff3e4..0518a7683a 100644 --- a/openvmm/openvmm_core/src/worker/dispatch.rs +++ b/openvmm/openvmm_core/src/worker/dispatch.rs @@ -1888,15 +1888,14 @@ impl InitializedVm { .map(build_root_port_definition) .collect(); - GenericPcieRootComplex::new( + GenericPcieRootComplex::builder( &mut services.register_mmio(), - rc.start_bus, - rc.end_bus, - chbcr_range, + rc.start_bus..=rc.end_bus, ranges.ecam_range, - root_port_definitions, - msi_conn.target(), ) + .root_ports(root_port_definitions, msi_conn.target()) + .chbcr_range(chbcr_range) + .build() })?; if let Some(signal_msi) = partition.as_signal_msi(Vtl::Vtl0) { diff --git a/vm/devices/pci/pcie/fuzz/fuzz_pcie.rs b/vm/devices/pci/pcie/fuzz/fuzz_pcie.rs index 12ec778669..6ee324340d 100644 --- a/vm/devices/pci/pcie/fuzz/fuzz_pcie.rs +++ b/vm/devices/pci/pcie/fuzz/fuzz_pcie.rs @@ -129,15 +129,10 @@ impl FuzzRootComplex { .collect(); let msi_conn = pci_core::msi::MsiConnection::new(pci_core::bus_range::AssignedBusRange::new(), 0); - let rc = GenericPcieRootComplex::new( - &mut register_mmio, - START_BUS, - END_BUS, - None, - ecam_range, - port_defs, - msi_conn.target(), - ); + let rc = + GenericPcieRootComplex::builder(&mut register_mmio, START_BUS..=END_BUS, ecam_range) + .root_ports(port_defs, msi_conn.target()) + .build(); Self { rc } } @@ -264,7 +259,7 @@ fn do_fuzz(u: &mut Unstructured<'_>) -> arbitrary::Result<()> { let mut rc = FuzzRootComplex::new(&topology, ecam_range); - // Port keys: port 0 = device_number 0, port 1 = device_number 8 (1 << 3) + // Port keys: device number 0, 1, etc. let port0_key: u8 = 0; match topology { diff --git a/vm/devices/pci/pcie/src/root.rs b/vm/devices/pci/pcie/src/root.rs index adb7a660cb..6b64fb3c54 100644 --- a/vm/devices/pci/pcie/src/root.rs +++ b/vm/devices/pci/pcie/src/root.rs @@ -32,11 +32,41 @@ use pci_core::spec::hwid::ClassCode; use pci_core::spec::hwid::HardwareIds; use pci_core::spec::hwid::ProgrammingInterface; use pci_core::spec::hwid::Subclass; -use std::collections::HashMap; +use std::ops::RangeInclusive; use std::sync::Arc; use vmcore::device_state::ChangeDeviceState; use zerocopy::IntoBytes; +/// Maximum number of PCI devices on a single bus. +const MAX_DEVICES_PER_BUS: usize = 32; + +/// A device occupying a slot on the root complex bus. +enum BusDevice { + /// No device at this slot. + Empty, + /// A root port providing downstream connectivity. + RootPort { name: Arc, port: Box }, + /// A Root Complex Integrated Endpoint (RCiEP). + Rciep { + name: Arc, + dev: Box, + }, +} + +impl Inspect for BusDevice { + fn inspect(&self, req: inspect::Request<'_>) { + match self { + BusDevice::Empty => {} + BusDevice::RootPort { port, .. } => { + req.respond().merge(port.as_ref()); + } + BusDevice::Rciep { name, .. } => { + req.respond().field("name", name); + } + } + } +} + /// A generic PCI Express root complex emulator. #[derive(InspectMut)] pub struct GenericPcieRootComplex { @@ -50,14 +80,15 @@ pub struct GenericPcieRootComplex { chbcr: Option>, /// CXL Component Registers backing CHBCR accesses in CXL mode. cxl_component_registers: Option, - /// Map of root ports attached to the root complex, indexed by combined device and function numbers. - #[inspect(with = "|x| inspect::iter_by_key(x).map_value(|(_, v)| v)")] - ports: HashMap, RootPort)>, + /// Devices on the root complex bus, indexed by PCI device number (0-31). + /// Each slot can hold a root port, an RCiEP, or be empty. + #[inspect(iter_by_index)] + devices: [BusDevice; MAX_DEVICES_PER_BUS], } /// Information about a downstream port in a PCIe topology. pub struct DownstreamPortInfo { - /// The port number (device/function index). + /// The PCI device number (0-31) of this port on the root complex bus. pub port_number: u8, /// The port name. pub name: Arc, @@ -76,65 +107,66 @@ pub struct GenericPcieRootPortDefinition { pub settings: PciePortSettings, } -/// A flat description of a PCIe switch without hierarchy. -pub struct GenericSwitchDefinition { - /// The name of the switch. - pub name: Arc, - /// Number of downstream ports. - pub num_downstream_ports: u8, - /// The parent port this switch is connected to. - pub parent_port: Arc, - /// Whether hotplug is enabled for this switch. - pub hotplug: bool, - /// Express-level settings for downstream switch ports. - pub dsp_settings: PciePortSettings, +/// Builder for [`GenericPcieRootComplex`]. +/// +/// Obtain via [`GenericPcieRootComplex::builder`], configure optional +/// settings, then call [`build`](GenericPcieRootComplexBuilder::build). +pub struct GenericPcieRootComplexBuilder<'a> { + register_mmio: &'a mut dyn RegisterMmioIntercept, + bus_range: RangeInclusive, + ecam_range: MemoryRange, + root_ports: Option<(Vec, &'a MsiTarget)>, + first_port_device_number: u8, + chbcr_range: Option, } -impl GenericSwitchDefinition { - /// Create a new switch definition. - pub fn new( - name: impl Into>, - num_downstream_ports: u8, - parent_port: impl Into>, - hotplug: bool, - dsp_settings: PciePortSettings, +impl<'a> GenericPcieRootComplexBuilder<'a> { + /// Add root ports to the complex. + /// + /// `msi_target` is the MSI target for all root ports; the caller is + /// responsible for creating the `MsiConnection` and connecting it to + /// the platform's interrupt controller. + pub fn root_ports( + mut self, + ports: Vec, + msi_target: &'a MsiTarget, ) -> Self { - Self { - name: name.into(), - num_downstream_ports, - parent_port: parent_port.into(), - hotplug, - dsp_settings, - } + self.root_ports = Some((ports, msi_target)); + self } -} -enum DecodedEcamAccess<'a> { - UnexpectedIntercept, - Unroutable, - InternalBus(&'a mut RootPort, u16), - DownstreamPort(&'a mut RootPort, u8, u8, u16), -} + /// Set the first PCI device number to assign to root ports (default 0). + /// + /// Root ports are placed at consecutive device numbers starting from + /// this value. Use a non-zero value to reserve lower device numbers + /// for RCiEPs (e.g., an IOMMU at device 0). + pub fn first_port_device_number(mut self, device: u8) -> Self { + self.first_port_device_number = device; + self + } -impl GenericPcieRootComplex { - /// Constructs a new `GenericPcieRootComplex` emulator. + /// Set the CHBCR (Component Register BAR) MMIO range for CXL mode. /// - /// `msi_target` is the MSI target for all root ports in this complex. - /// The caller is responsible for creating the `MsiConnection` and - /// connecting it to the platform's interrupt controller. - pub fn new( - register_mmio: &mut dyn RegisterMmioIntercept, - start_bus: u8, - end_bus: u8, - chbcr_range: Option, - ecam_range: MemoryRange, - ports: Vec, - msi_target: &MsiTarget, - ) -> Self { - assert_eq!( - ecam_size_from_bus_numbers(start_bus, end_bus), - ecam_range.len() - ); + /// When set, CXL component registers are allocated and a CHBCR MMIO + /// region is mapped. + pub fn chbcr_range(mut self, range: Option) -> Self { + self.chbcr_range = range; + self + } + + /// Build the root complex. + pub fn build(self) -> GenericPcieRootComplex { + let Self { + register_mmio, + bus_range, + ecam_range, + root_ports, + first_port_device_number, + chbcr_range, + } = self; + + let start_bus = *bus_range.start(); + let end_bus = *bus_range.end(); let mut ecam = register_mmio.new_io_region("ecam", ecam_range.len()); ecam.map(ecam_range.start()); @@ -157,19 +189,20 @@ impl GenericPcieRootComplex { region }); - let port_map: HashMap, RootPort)> = ports - .into_iter() - .enumerate() - .map(|(i, definition)| { - let device_number: u8 = (i << BDF_DEVICE_SHIFT).try_into().expect("too many ports"); - // Use the device number as the slot number for hotpluggable ports + let mut devices: [BusDevice; MAX_DEVICES_PER_BUS] = + std::array::from_fn(|_| BusDevice::Empty); + + if let Some((ports, msi_target)) = root_ports { + for (i, definition) in ports.into_iter().enumerate() { + let device = i + first_port_device_number as usize; + assert!(device < MAX_DEVICES_PER_BUS, "too many ports"); + let devfn = (device << BDF_DEVICE_SHIFT) as u8; let hotplug_slot_number = if definition.hotplug { - Some((device_number as u32) + 1) + Some(device as u32 + 1) } else { None }; - // Derive a per-port MSI target with this port's devfn. - let port_msi_target = msi_target.with_devfn(device_number); + let port_msi_target = msi_target.with_devfn(devfn); let root_port = RootPort::new( register_mmio, definition.name.clone(), @@ -177,17 +210,86 @@ impl GenericPcieRootComplex { &port_msi_target, definition.settings, ); - (device_number, (definition.name, root_port)) - }) - .collect(); + devices[device] = BusDevice::RootPort { + name: definition.name, + port: Box::new(root_port), + }; + } + } - Self { + GenericPcieRootComplex { start_bus, end_bus, ecam, chbcr, cxl_component_registers, - ports: port_map, + devices, + } + } +} + +/// A flat description of a PCIe switch without hierarchy. +pub struct GenericSwitchDefinition { + /// The name of the switch. + pub name: Arc, + /// Number of downstream ports. + pub num_downstream_ports: u8, + /// The parent port this switch is connected to. + pub parent_port: Arc, + /// Whether hotplug is enabled for this switch. + pub hotplug: bool, + /// Express-level settings for downstream switch ports. + pub dsp_settings: PciePortSettings, +} + +impl GenericSwitchDefinition { + /// Create a new switch definition. + pub fn new( + name: impl Into>, + num_downstream_ports: u8, + parent_port: impl Into>, + hotplug: bool, + dsp_settings: PciePortSettings, + ) -> Self { + Self { + name: name.into(), + num_downstream_ports, + parent_port: parent_port.into(), + hotplug, + dsp_settings, + } + } +} + +enum DecodedEcamAccess<'a> { + UnexpectedIntercept, + Unroutable, + InternalBus(&'a mut RootPort, u16), + DownstreamPort(&'a mut RootPort, u8, u8, u16), + /// A Root Complex Integrated Endpoint (RCiEP) on the start bus. + Rciep(&'a mut dyn GenericPciBusDevice, u16), +} + +impl GenericPcieRootComplex { + /// Returns a builder for constructing a new `GenericPcieRootComplex`. + pub fn builder<'a>( + register_mmio: &'a mut dyn RegisterMmioIntercept, + bus_range: RangeInclusive, + ecam_range: MemoryRange, + ) -> GenericPcieRootComplexBuilder<'a> { + assert_eq!( + ecam_size_from_bus_numbers(*bus_range.start(), *bus_range.end()), + ecam_range.len(), + "ECAM range size does not match bus range" + ); + + GenericPcieRootComplexBuilder { + register_mmio, + bus_range, + ecam_range, + root_ports: None, + first_port_device_number: 0, + chbcr_range: None, } } @@ -225,43 +327,35 @@ impl GenericPcieRootComplex { } } - /// Attach the provided `GenericPciBusDevice` to the port identified. + /// Attach the provided `GenericPciBusDevice` to the port identified by + /// PCI device number (0-31). pub fn add_pcie_device( &mut self, port: u8, name: impl AsRef, dev: Box, ) -> Result<(), Arc> { - let (_port_name, root_port) = self.ports.get_mut(&port).ok_or_else(|| -> Arc { - tracing::error!( - "GenericPcieRootComplex: port {:#x} not found for device '{}'", - port, - name.as_ref() - ); - format!("Port {:#x} not found", port).into() - })?; + let root_port = match &mut self.devices[port as usize] { + BusDevice::RootPort { port, .. } => port, + BusDevice::Rciep { name: existing, .. } => return Err(existing.clone()), + BusDevice::Empty => return Err(format!("device {port} is not a root port").into()), + }; - match root_port.connect_device(name, dev) { - Ok(()) => Ok(()), - Err(existing_device) => { - tracing::warn!( - "GenericPcieRootComplex: failed to connect device to port {:#x}, existing device: '{}'", - port, - existing_device - ); - Err(existing_device) - } - } + root_port.connect_device(name, dev) } /// Enumerate the downstream ports of the root complex. pub fn downstream_ports(&self) -> Vec { - self.ports + self.devices .iter() - .map(|(port, (name, rp))| DownstreamPortInfo { - port_number: *port, - name: name.clone(), - bus_range: rp.port.bus_range(), + .enumerate() + .filter_map(|(i, d)| match d { + BusDevice::RootPort { name, port } => Some(DownstreamPortInfo { + port_number: i as u8, + name: name.clone(), + bus_range: port.port.bus_range(), + }), + BusDevice::Rciep { .. } | BusDevice::Empty => None, }) .collect() } @@ -273,27 +367,52 @@ impl GenericPcieRootComplex { device_name: &str, device: Box, ) -> anyhow::Result<()> { - let (_, (_, root_port)) = self - .ports + let root_port = self + .devices .iter_mut() - .find(|(_, (name, _))| name.as_ref() == port_name) + .find_map(|d| match d { + BusDevice::RootPort { name, port } if name.as_ref() == port_name => Some(port), + BusDevice::RootPort { .. } | BusDevice::Rciep { .. } | BusDevice::Empty => None, + }) .ok_or_else(|| anyhow::anyhow!("port '{}' not found", port_name))?; root_port.port.hotplug_add_device(device_name, device) } /// Hot-remove the device from a named port. pub fn hotplug_remove_device(&mut self, port_name: &str) -> anyhow::Result<()> { - let (_, (_, root_port)) = self - .ports + let root_port = self + .devices .iter_mut() - .find(|(_, (name, _))| name.as_ref() == port_name) + .find_map(|d| match d { + BusDevice::RootPort { name, port } if name.as_ref() == port_name => Some(port), + BusDevice::RootPort { .. } | BusDevice::Rciep { .. } | BusDevice::Empty => None, + }) .ok_or_else(|| anyhow::anyhow!("port '{}' not found", port_name))?; root_port.port.hotplug_remove_device() } - /// Returns the size of the ECAM MMIO region this root complex is emulating. - pub fn ecam_size(&self) -> u64 { - ecam_size_from_bus_numbers(self.start_bus, self.end_bus) + /// Attach a Root Complex Integrated Endpoint (RCiEP) at the given + /// PCI device number (0-31) on the start bus of this root complex. + /// + /// RCiEPs are Type 0 PCI functions that appear directly on the start + /// bus alongside root ports (e.g., an AMD IOMMU at device 0). + /// They do not sit behind a downstream port and have a fixed BDF. + pub fn add_rciep( + &mut self, + device: u8, + name: impl Into>, + dev: Box, + ) -> Result<(), Arc> { + let name = name.into(); + match &self.devices[device as usize] { + BusDevice::Empty => {} + BusDevice::RootPort { name: existing, .. } + | BusDevice::Rciep { name: existing, .. } => { + return Err(existing.clone()); + } + } + self.devices[device as usize] = BusDevice::Rciep { name, dev }; + Ok(()) } fn decode_ecam_access<'a>(&'a mut self, addr: u64) -> DecodedEcamAccess<'a> { @@ -310,26 +429,35 @@ impl GenericPcieRootComplex { let cfg_offset_within_function = (ecam_offset & PAGE_OFFSET_MASK) as u16; if bus_number == self.start_bus { - match self.ports.get_mut(&device_function) { - Some((_, port)) => { + let device = (device_function >> BDF_DEVICE_SHIFT) as usize; + let function = device_function & 0x7; + match &mut self.devices[device] { + BusDevice::RootPort { port, .. } if function == 0 => { return DecodedEcamAccess::InternalBus(port, cfg_offset_within_function); } - None => return DecodedEcamAccess::Unroutable, + BusDevice::Rciep { dev, .. } if function == 0 => { + return DecodedEcamAccess::Rciep(dev.as_mut(), cfg_offset_within_function); + } + BusDevice::RootPort { .. } | BusDevice::Rciep { .. } | BusDevice::Empty => { + return DecodedEcamAccess::Unroutable; + } } } else if bus_number > self.start_bus && bus_number <= self.end_bus { - for (_, port) in self.ports.values_mut() { - if port - .port - .cfg_space - .assigned_bus_range() - .contains(&bus_number) - { - return DecodedEcamAccess::DownstreamPort( - port, - bus_number, - device_function, - cfg_offset_within_function, - ); + for d in self.devices.iter_mut() { + if let BusDevice::RootPort { port, .. } = d { + if port + .port + .cfg_space + .assigned_bus_range() + .contains(&bus_number) + { + return DecodedEcamAccess::DownstreamPort( + port, + bus_number, + device_function, + cfg_offset_within_function, + ); + } } } return DecodedEcamAccess::Unroutable; @@ -349,14 +477,16 @@ impl GenericPcieRootComplex { } } - for (_, port) in self.ports.values_mut() { - if let Some((bar, offset)) = port.port.find_bar(addr) { - if let Err(err) = - validate_aligned_access(addr, data.len(), &BAR_ALLOWED_ACCESS_SIZES) - { - return Some(IoResult::Err(err)); + for d in self.devices.iter_mut() { + if let BusDevice::RootPort { port, .. } = d { + if let Some((bar, offset)) = port.port.find_bar(addr) { + if let Err(err) = + validate_aligned_access(addr, data.len(), &BAR_ALLOWED_ACCESS_SIZES) + { + return Some(IoResult::Err(err)); + } + return Some(port.port.bar_mmio_read(bar, offset, data)); } - return Some(port.port.bar_mmio_read(bar, offset, data)); } } @@ -373,14 +503,16 @@ impl GenericPcieRootComplex { } } - for (_, port) in self.ports.values_mut() { - if let Some((bar, offset)) = port.port.find_bar(addr) { - if let Err(err) = - validate_aligned_access(addr, data.len(), &BAR_ALLOWED_ACCESS_SIZES) - { - return Some(IoResult::Err(err)); + for d in self.devices.iter_mut() { + if let BusDevice::RootPort { port, .. } = d { + if let Some((bar, offset)) = port.port.find_bar(addr) { + if let Err(err) = + validate_aligned_access(addr, data.len(), &BAR_ALLOWED_ACCESS_SIZES) + { + return Some(IoResult::Err(err)); + } + return Some(port.port.bar_mmio_write(bar, offset, data)); } - return Some(port.port.bar_mmio_write(bar, offset, data)); } } @@ -400,8 +532,10 @@ impl ChangeDeviceState for GenericPcieRootComplex { async fn stop(&mut self) {} async fn reset(&mut self) { - for (_, (_, port)) in self.ports.iter_mut() { - port.port.cfg_space.reset(); + for d in self.devices.iter_mut() { + if let BusDevice::RootPort { port, .. } = d { + port.port.cfg_space.reset(); + } } } } @@ -470,6 +604,11 @@ impl MmioIntercept for GenericPcieRootComplex { DecodedEcamAccess::InternalBus(port, cfg_offset) => { check_result!(port.port.cfg_space.read_u32(cfg_offset, &mut dword_value)); } + DecodedEcamAccess::Rciep(dev, cfg_offset) => { + if let Some(result) = dev.pci_cfg_read(cfg_offset, &mut dword_value) { + check_result!(result); + } + } DecodedEcamAccess::DownstreamPort(port, bus_number, function, cfg_offset) => { check_result!(port.forward_cfg_read( &bus_number, @@ -536,6 +675,11 @@ impl MmioIntercept for GenericPcieRootComplex { DecodedEcamAccess::InternalBus(port, cfg_offset) => { check_result!(port.port.cfg_space.write_u32(cfg_offset, write_dword)); } + DecodedEcamAccess::Rciep(dev, cfg_offset) => { + if let Some(result) = dev.pci_cfg_write(cfg_offset, write_dword) { + check_result!(result); + } + } DecodedEcamAccess::DownstreamPort(port, bus_number, function, cfg_offset) => { check_result!(port.forward_cfg_write( &bus_number, @@ -681,7 +825,7 @@ mod save_restore { #[derive(Protobuf)] #[mesh(package = "pcie.root")] pub struct PortSavedState { - /// The port number (device_function index in the ports HashMap). + /// The PCI device number (0-31) of this port. #[mesh(1)] pub port_number: u8, /// The root port Type 1 configuration space state. @@ -715,16 +859,17 @@ mod save_restore { type SavedState = state::SavedState; fn save(&mut self) -> Result { - // Save all root ports and sort by port number for stable ordering. - let mut ports = Vec::with_capacity(self.ports.len()); - for (&port_number, (_, root_port)) in self.ports.iter_mut() { - ports.push(state::PortSavedState { - port_number, - cfg_space: root_port.port.cfg_space.save()?, - cxl_component_registers: root_port.port.save_cxl_component_registers_state()?, - }); + // Save all root ports in device-number order (already ordered by array index). + let mut ports = Vec::new(); + for (i, d) in self.devices.iter_mut().enumerate() { + if let BusDevice::RootPort { port, .. } = d { + ports.push(state::PortSavedState { + port_number: i as u8, + cfg_space: port.port.cfg_space.save()?, + cxl_component_registers: port.port.save_cxl_component_registers_state()?, + }); + } } - ports.sort_by_key(|p| p.port_number); Ok(state::SavedState { start_bus: self.start_bus, @@ -758,11 +903,16 @@ mod save_restore { } // Validate port count matches - if ports.len() != self.ports.len() { + let root_port_count = self + .devices + .iter() + .filter(|d| matches!(d, BusDevice::RootPort { .. })) + .count(); + if ports.len() != root_port_count { return Err(RestoreError::InvalidSavedState(anyhow::anyhow!( "root port count mismatch: saved {}, current {}", ports.len(), - self.ports.len() + root_port_count ))); } @@ -777,16 +927,19 @@ mod save_restore { ))); } - if let Some((_, root_port)) = self.ports.get_mut(&port_state.port_number) { - root_port.port.cfg_space.restore(port_state.cfg_space)?; - root_port.port.restore_cxl_component_registers_state( - port_state.cxl_component_registers, - )?; - } else { - return Err(RestoreError::InvalidSavedState(anyhow::anyhow!( - "root port {} not found", - port_state.port_number - ))); + match &mut self.devices[port_state.port_number as usize] { + BusDevice::RootPort { port, .. } => { + port.port.cfg_space.restore(port_state.cfg_space)?; + port.port.restore_cxl_component_registers_state( + port_state.cxl_component_registers, + )?; + } + BusDevice::Rciep { .. } | BusDevice::Empty => { + return Err(RestoreError::InvalidSavedState(anyhow::anyhow!( + "root port {} not found", + port_state.port_number + ))); + } } } @@ -836,15 +989,9 @@ mod tests { let rc_bus_range = AssignedBusRange::new(); rc_bus_range.set_bus_range(start_bus, end_bus); let msi_conn = pci_core::msi::MsiConnection::new(rc_bus_range, 0); - GenericPcieRootComplex::new( - &mut register_mmio, - start_bus, - end_bus, - None, - ecam, - port_defs, - msi_conn.target(), - ) + GenericPcieRootComplex::builder(&mut register_mmio, start_bus..=end_bus, ecam) + .root_ports(port_defs, msi_conn.target()) + .build() } fn instantiate_root_complex_with_chbcr( @@ -870,15 +1017,10 @@ mod tests { rc_bus_range.set_bus_range(start_bus, end_bus); let msi_conn = pci_core::msi::MsiConnection::new(rc_bus_range, 0); - GenericPcieRootComplex::new( - &mut register_mmio, - start_bus, - end_bus, - Some(chbcr), - ecam, - port_defs, - msi_conn.target(), - ) + GenericPcieRootComplex::builder(&mut register_mmio, start_bus..=end_bus, ecam) + .root_ports(port_defs, msi_conn.target()) + .chbcr_range(Some(chbcr)) + .build() } #[test] @@ -930,17 +1072,17 @@ mod tests { #[test] fn test_ecam_size() { // Single bus - assert_eq!(instantiate_root_complex(0, 0, 0).ecam_size(), 0x10_0000); - assert_eq!(instantiate_root_complex(32, 32, 0).ecam_size(), 0x10_0000); - assert_eq!(instantiate_root_complex(255, 255, 0).ecam_size(), 0x10_0000); + assert_eq!(ecam_size_from_bus_numbers(0, 0), 0x10_0000); + assert_eq!(ecam_size_from_bus_numbers(32, 32), 0x10_0000); + assert_eq!(ecam_size_from_bus_numbers(255, 255), 0x10_0000); // Two bus - assert_eq!(instantiate_root_complex(0, 1, 0).ecam_size(), 0x20_0000); - assert_eq!(instantiate_root_complex(32, 33, 0).ecam_size(), 0x20_0000); - assert_eq!(instantiate_root_complex(254, 255, 0).ecam_size(), 0x20_0000); + assert_eq!(ecam_size_from_bus_numbers(0, 1), 0x20_0000); + assert_eq!(ecam_size_from_bus_numbers(32, 33), 0x20_0000); + assert_eq!(ecam_size_from_bus_numbers(254, 255), 0x20_0000); // Everything - assert_eq!(instantiate_root_complex(0, 255, 0).ecam_size(), 0x1000_0000); + assert_eq!(ecam_size_from_bus_numbers(0, 255), 0x1000_0000); } #[test] @@ -1000,12 +1142,8 @@ mod tests { rc.add_pcie_device(0, "ep1", Box::new(endpoint1)).unwrap(); - match rc.add_pcie_device(0, "ep2", Box::new(endpoint2)) { - Ok(()) => panic!("should have failed"), - Err(name) => { - assert_eq!(name, "ep1".into()); - } - } + rc.add_pcie_device(0, "ep2", Box::new(endpoint2)) + .expect_err("should fail: port already occupied"); } #[test] diff --git a/vmm_core/vmotherboard/src/base_chipset.rs b/vmm_core/vmotherboard/src/base_chipset.rs index 43a5facc60..a48b96c145 100644 --- a/vmm_core/vmotherboard/src/base_chipset.rs +++ b/vmm_core/vmotherboard/src/base_chipset.rs @@ -838,6 +838,24 @@ mod weak_mutex_pci { fn downstream_ports(&self) -> Vec { self.lock().downstream_ports() } + + fn add_rciep( + &mut self, + device: u8, + name: Arc, + dev: Weak>, + ) -> Result<(), PcieConflict> { + self.lock() + .add_rciep( + device, + name.clone(), + Box::new(WeakMutexPciDeviceWrapper(dev)), + ) + .map_err(|existing_dev_name| PcieConflict { + reason: PcieConflictReason::ExistingDev(existing_dev_name), + conflict_dev: name, + }) + } } // wiring to enable using the PCIe switch alongside the Arc+CloseableMutex device infra diff --git a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/device.rs b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/device.rs index 52b1dae6d1..3ea4bcb1cd 100644 --- a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/device.rs +++ b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/device.rs @@ -7,6 +7,7 @@ use super::services::ArcMutexChipsetServices; use crate::BusIdPci; use crate::BusIdPcieDownstreamPort; +use crate::BusIdPcieEnumerator; use crate::VmmChipsetDevice; use arc_cyclic_builder::ArcCyclicBuilder; use arc_cyclic_builder::ArcCyclicBuilderExt; @@ -75,6 +76,7 @@ pub struct ArcMutexChipsetDeviceBuilder<'a, 'b, T> { pci_addr: Option<(u8, u8, u8)>, pci_bus_id: Option, pcie_port: Option, + pcie_rciep: Option<(BusIdPcieEnumerator, u8)>, external_pci: bool, } @@ -102,6 +104,7 @@ where pci_addr: None, pci_bus_id: None, pcie_port: None, + pcie_rciep: None, external_pci: false, } } @@ -131,6 +134,22 @@ where self } + /// For PCIe devices: place the device as a Root Complex Integrated + /// Endpoint (RCiEP) on the start bus of the specified root complex. + /// + /// RCiEPs are Type 0 PCI functions that sit directly on the start bus + /// alongside root ports, without a downstream port above them (e.g., an + /// AMD IOMMU). The device occupies function 0 of the given PCI device + /// number (0-31). + pub fn on_pcie_root_complex( + mut self, + enumerator_id: BusIdPcieEnumerator, + device: u8, + ) -> Self { + self.pcie_rciep = Some((enumerator_id, device)); + self + } + /// For PCI devices: do not register the device with any PCI bus. This is /// used when the device is hooked up to a bus (such as a VPCI bus) outside /// of the vmotherboard infrastructure. @@ -167,15 +186,23 @@ where if !self.external_pci { if let Some(dev) = typed_dev.supports_pci() { - if self.pci_bus_id.is_some() && self.pcie_port.is_some() { + let bus_options = [ + self.pci_bus_id.is_some(), + self.pcie_port.is_some(), + self.pcie_rciep.is_some(), + ]; + if bus_options.iter().filter(|&&v| v).count() > 1 { panic!( - "wiring error: invoked both `on_pci_bus` and `on_pcie_port` for `{}`", + "wiring error: invoked multiple bus placement methods for `{}`", self.dev_name ); } if let Some(bus_id_port) = self.pcie_port { self.services.register_static_pcie(bus_id_port); + } else if let Some((enumerator_id, device)) = self.pcie_rciep { + self.services + .register_static_pcie_rciep(enumerator_id, device); } else { // static pci registration let bdf = match (self.pci_addr, dev.suggested_bdf()) { diff --git a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs index a87bd7e97c..0f409c9b67 100644 --- a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs +++ b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs @@ -88,6 +88,23 @@ pub trait RegisterWeakMutexPcie: Send { /// Enumerate the downstream ports. fn downstream_ports(&self) -> Vec; + + /// Try to add a Root Complex Integrated Endpoint (RCiEP) at the given + /// device/function on the start bus of the root complex. + /// + /// Not all enumerators support RCiEPs — only root complexes do. + /// The default implementation returns an error. + fn add_rciep( + &mut self, + _device: u8, + name: Arc, + _device_handle: Weak>, + ) -> Result<(), PcieConflict> { + Err(PcieConflict { + conflict_dev: name, + reason: PcieConflictReason::RciepNotSupported, + }) + } } pub struct WeakMutexPcieDeviceEntry { @@ -96,11 +113,19 @@ pub struct WeakMutexPcieDeviceEntry { pub dev: Weak>, } +pub struct WeakMutexPcieRciepEntry { + pub bus_id_enumerator: BusIdPcieEnumerator, + pub device: u8, + pub name: Arc, + pub dev: Weak>, +} + #[derive(Default)] pub struct BusResolverWeakMutexPcie { pub enumerators: HashMap>, pub ports: HashMap, pub devices: Vec, + pub rcieps: Vec, } impl BusResolverWeakMutexPcie { @@ -144,6 +169,33 @@ impl BusResolverWeakMutexPcie { }; } + for WeakMutexPcieRciepEntry { + bus_id_enumerator, + device, + name, + dev, + } in self.rcieps + { + let enumerator = match self.enumerators.get_mut(&bus_id_enumerator) { + Some(enumerator) => enumerator, + None => { + errs.push(PcieConflict { + conflict_dev: name.clone(), + reason: PcieConflictReason::MissingEnumerator, + }); + continue; + } + }; + + match enumerator.add_rciep(device, name, dev) { + Ok(()) => {} + Err(conflict) => { + errs.push(conflict); + continue; + } + }; + } + if !errs.is_empty() { Err(errs) } else { Ok(()) } } } diff --git a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/services.rs b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/services.rs index fc5c4bcf88..26d03626e4 100644 --- a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/services.rs +++ b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/services.rs @@ -8,6 +8,7 @@ use super::device::ArcMutexChipsetServicesFinalize; use super::state_unit::ArcMutexChipsetDeviceUnit; use crate::BusIdPci; use crate::BusIdPcieDownstreamPort; +use crate::BusIdPcieEnumerator; use crate::ChipsetBuilder; use crate::VmmChipsetDevice; use crate::chipset::io_ranges::IoRanges; @@ -203,6 +204,19 @@ impl<'a, 'b> ArcMutexChipsetServices<'a, 'b> { ); } + pub fn register_static_pcie_rciep( + &mut self, + enumerator_id: BusIdPcieEnumerator, + device: u8, + ) { + self.builder.register_weak_mutex_pcie_rciep( + enumerator_id, + device, + self.dev_name.clone(), + self.dev.clone(), + ); + } + pub fn new_line(&mut self, id: LineSetId, name: &str, vector: u32) -> LineInterrupt { let mut inner = self.builder.inner.lock(); let (line_set, _) = diff --git a/vmm_core/vmotherboard/src/chipset/builder/mod.rs b/vmm_core/vmotherboard/src/chipset/builder/mod.rs index 58a136d80e..6faf4e1c7f 100644 --- a/vmm_core/vmotherboard/src/chipset/builder/mod.rs +++ b/vmm_core/vmotherboard/src/chipset/builder/mod.rs @@ -15,6 +15,7 @@ use super::backing::arc_mutex::pci::RegisterWeakMutexPci; use super::backing::arc_mutex::pci::RegisterWeakMutexPcie; use super::backing::arc_mutex::pci::WeakMutexPciEntry; use super::backing::arc_mutex::pci::WeakMutexPcieDeviceEntry; +use super::backing::arc_mutex::pci::WeakMutexPcieRciepEntry; use super::backing::arc_mutex::services::ArcMutexChipsetServices; use super::backing::arc_mutex::state_unit::ArcMutexChipsetDeviceUnit; use crate::BusId; @@ -255,6 +256,26 @@ impl<'a> ChipsetBuilder<'a> { }); } + pub(crate) fn register_weak_mutex_pcie_rciep( + &self, + bus_id_enumerator: BusIdPcieEnumerator, + device: u8, + name: Arc, + dev: Weak>, + ) { + self.inner + .lock() + .bus_resolver + .pcie + .rcieps + .push(WeakMutexPcieRciepEntry { + bus_id_enumerator, + device, + name, + dev, + }); + } + /// Add a new [`ChipsetDevice`](chipset_device::ChipsetDevice) to the /// chipset. **`dev_name` must be unique!** pub fn arc_mutex_device<'b, T: VmmChipsetDevice>( diff --git a/vmm_core/vmotherboard/src/chipset/mod.rs b/vmm_core/vmotherboard/src/chipset/mod.rs index ae723ba0a7..7f665d8001 100644 --- a/vmm_core/vmotherboard/src/chipset/mod.rs +++ b/vmm_core/vmotherboard/src/chipset/mod.rs @@ -324,6 +324,7 @@ pub enum PcieConflictReason { ExistingDev(Arc), MissingDownstreamPort, MissingEnumerator, + RciepNotSupported, } #[derive(Debug)] @@ -356,6 +357,13 @@ impl std::fmt::Display for PcieConflict { self.conflict_dev ) } + PcieConflictReason::RciepNotSupported => { + write!( + fmt, + "cannot attach {} as RCiEP, enumerator does not support RCiEPs", + self.conflict_dev + ) + } } } } From 1c18cb60ae5d8ee53f10fa2011ef04854585e8df Mon Sep 17 00:00:00 2001 From: John Starks Date: Thu, 21 May 2026 18:20:43 +0000 Subject: [PATCH 2/2] feedback --- vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs index 0f409c9b67..eaa051232b 100644 --- a/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs +++ b/vmm_core/vmotherboard/src/chipset/backing/arc_mutex/pci.rs @@ -90,7 +90,7 @@ pub trait RegisterWeakMutexPcie: Send { fn downstream_ports(&self) -> Vec; /// Try to add a Root Complex Integrated Endpoint (RCiEP) at the given - /// device/function on the start bus of the root complex. + /// PCI device number (0-31) on the start bus of the root complex. /// /// Not all enumerators support RCiEPs — only root complexes do. /// The default implementation returns an error.