From 3e6f44e53447aafe520eb593e906aa20d3d5b16f Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Fri, 24 Apr 2026 15:49:37 +0200 Subject: [PATCH] XXX Backport of https://github.com/cloud-hypervisor/cloud-hypervisor/pull/8031 Codex-assisted backport only to run that in our libvirt-tests pipeline. --- virtio-devices/src/device.rs | 4 +- vmm/src/acpi.rs | 4 +- vmm/src/device_manager.rs | 652 ++++++++++++++++++----------------- vmm/src/pci_segment.rs | 1 + vmm/src/vm.rs | 3 +- 5 files changed, 352 insertions(+), 312 deletions(-) diff --git a/virtio-devices/src/device.rs b/virtio-devices/src/device.rs index 8289cdf116..82dc39ef70 100644 --- a/virtio-devices/src/device.rs +++ b/virtio-devices/src/device.rs @@ -326,7 +326,9 @@ impl Pausable for VirtioCommon { "Pausing virtio-{}", VirtioDeviceType::from(self.device_type) ); - self.paused.store(true, Ordering::SeqCst); + if self.paused.swap(true, Ordering::SeqCst) { + return Ok(()); + } if let Some(pause_evt) = &self.pause_evt { pause_evt .write(1) diff --git a/vmm/src/acpi.rs b/vmm/src/acpi.rs index 8f46b20ddb..2ab61b5e6a 100644 --- a/vmm/src/acpi.rs +++ b/vmm/src/acpi.rs @@ -1014,7 +1014,7 @@ fn create_acpi_tables_internal( // VIOT if let Some((iommu_bdf, devices_bdf)) = device_manager.lock().unwrap().iommu_attached_devices() { - let viot = create_viot_table(iommu_bdf, devices_bdf); + let viot = create_viot_table(&iommu_bdf, &devices_bdf); let viot_addr = prev_tbl_addr.checked_add(prev_tbl_len).unwrap(); tables_bytes.extend_from_slice(viot.as_slice()); @@ -1183,7 +1183,7 @@ pub fn create_acpi_tables_tdx( // VIOT if let Some((iommu_bdf, devices_bdf)) = device_manager.lock().unwrap().iommu_attached_devices() { - tables.push(create_viot_table(iommu_bdf, devices_bdf)); + tables.push(create_viot_table(&iommu_bdf, &devices_bdf)); } tables diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 1e6293c350..6371dc57d0 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -881,6 +881,33 @@ struct DeviceManagerState { device_id_cnt: Wrapping, } +struct PciHotplugSharedState { + pci_segments: Vec, + selected_segment: usize, + // Let the bus keep weak refs while this shared state owns the strong refs. + bus_devices: Vec>, + vfio_container: Option>, + iommu_attached_devices: Option<(PciBdf, Vec)>, + virtio_mem_devices: Vec>>, +} + +impl PciHotplugSharedState { + fn push_bus_device(&mut self, bus_device: Arc) { + self.bus_devices.push(bus_device); + } + + fn remove_bus_device(&mut self, bus_device: &Arc) { + self.bus_devices.retain(|dev| !Arc::ptr_eq(dev, bus_device)); + } + + fn cleanup_vfio_container(&mut self) { + if let Some(1) = self.vfio_container.as_ref().map(Arc::strong_count) { + debug!("Drop 'vfio container' given no active 'vfio devices'."); + self.vfio_container = None; + } + } +} + #[derive(Debug)] pub struct PtyPair { pub main: File, @@ -998,6 +1025,10 @@ pub struct DeviceManager { /// a weak reference). _acpi_cpu_hotplug_controller: Arc>, + /// Owned version needed to keep the bus device alive (the bus only holds + /// a weak reference). + _acpi_pci_hotplug_controller: Arc>, + // The virtio devices on the system virtio_devices: VecDeque, @@ -1005,11 +1036,6 @@ pub struct DeviceManager { block_devices: Vec>>, // List of bus devices - // Let the DeviceManager keep strong references to the BusDevice devices. - // This allows the IO and MMIO buses to be provided with Weak references, - // which prevents cyclic dependencies. - bus_devices: Vec>, - // Counter to keep track of the consumed device IDs. device_id_cnt: Wrapping, @@ -1026,21 +1052,10 @@ pub struct DeviceManager { // Passthrough device handle passthrough_device: Option, - // VFIO container - // Only one container can be created, therefore it is stored as part of the - // DeviceManager to be reused. - vfio_container: Option>, - // Paravirtualized IOMMU iommu_device: Option>>, iommu_mapping: Option>, - // PCI information about devices attached to the paravirtualized IOMMU - // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF - // representing the devices attached to the virtual IOMMU. This is useful - // information for filling the ACPI VIOT table. - iommu_attached_devices: Option<(PciBdf, Vec)>, - // Tree of devices, representing the dependencies between devices. // Useful for introspection, snapshot and restore. device_tree: Arc>, @@ -1071,11 +1086,7 @@ pub struct DeviceManager { activate_evt: EventFd, acpi_address: GuestAddress, - - selected_segment: usize, - - // Possible handle to the virtio-mem device - virtio_mem_devices: Vec>>, + pci_hotplug_state: Arc>, #[cfg(target_arch = "aarch64")] // GPIO device for AArch64 @@ -1126,6 +1137,227 @@ pub struct DeviceManager { ivshmem_device: Option>>, } +/// MMIO-accessible controller for ACPI PCI hotplug register accesses. +pub struct AcpiPciHotplugController { + shared_state: Arc>, + address_manager: Arc, + memory_manager: Arc>, + device_tree: Arc>, + mmio_regions: Arc>>, +} + +impl AcpiPciHotplugController { + fn new( + shared_state: Arc>, + address_manager: Arc, + memory_manager: Arc>, + device_tree: Arc>, + mmio_regions: Arc>>, + ) -> Self { + Self { + shared_state, + address_manager, + memory_manager, + device_tree, + mmio_regions, + } + } + + fn eject_device( + &mut self, + pci_segment_id: u16, + device_id: u8, + ) -> DeviceManagerResult>>> { + info!("Ejecting device_id = {device_id} on segment_id={pci_segment_id}"); + + let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); + + let (pci_bus, mem32_allocator, mem64_allocator, virtio_mem_devices, iommu_attached) = { + let shared_state = self.shared_state.lock().unwrap(); + let pci_segment = &shared_state.pci_segments[pci_segment_id as usize]; + + ( + Arc::clone(&pci_segment.pci_bus), + Arc::clone(&pci_segment.mem32_allocator), + Arc::clone(&pci_segment.mem64_allocator), + shared_state.virtio_mem_devices.clone(), + shared_state + .iommu_attached_devices + .as_ref() + .is_some_and(|(_, devices)| devices.contains(&pci_device_bdf)), + ) + }; + + pci_bus + .lock() + .unwrap() + .put_device_id(device_id as usize) + .map_err(DeviceManagerError::PutPciDeviceId)?; + + let (pci_device_handle, id) = { + let mut device_tree = self.device_tree.lock().unwrap(); + let pci_device_node = device_tree + .remove_node_by_pci_bdf(pci_device_bdf) + .ok_or(DeviceManagerError::MissingPciDevice)?; + + let mut id = pci_device_node.id; + let pci_device_handle = pci_device_node + .pci_device_handle + .ok_or(DeviceManagerError::MissingPciDevice)?; + if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) + && !pci_device_node.children.is_empty() + { + assert_eq!(pci_device_node.children.len(), 1); + id.clone_from(&pci_device_node.children[0]); + } + for child in &pci_device_node.children { + device_tree.remove(child); + } + + (pci_device_handle, id) + }; + + let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { + PciDeviceHandle::Vfio(vfio_pci_device) => { + let device_regions = vfio_pci_device.lock().unwrap().mmio_regions().clone(); + let mut mmio_regions = self.mmio_regions.lock().unwrap(); + for device_region in &device_regions { + mmio_regions.retain(|x| !x.has_matching_slots(device_region)); + } + + ( + Arc::clone(&vfio_pci_device) as Arc>, + Arc::clone(&vfio_pci_device) as Arc, + None as Option>>, + false, + ) + } + PciDeviceHandle::Virtio(virtio_pci_device) => { + let dev = virtio_pci_device.lock().unwrap(); + let bar_addr = dev.config_bar_addr(); + for (event, addr) in dev.ioeventfds(bar_addr) { + let io_addr = IoEventAddress::Mmio(addr); + self.address_manager + .vm + .unregister_ioevent(event, &io_addr) + .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; + } + + if let Some(dma_handler) = dev.dma_handler() + && !iommu_attached + { + for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { + for region in zone.regions() { + dma_handler + .unmap(region.start_addr().0, region.len()) + .map_err(DeviceManagerError::VirtioDmaUnmap)?; + } + } + } + + ( + Arc::clone(&virtio_pci_device) as Arc>, + Arc::clone(&virtio_pci_device) as Arc, + Some(dev.virtio_device()), + dev.dma_handler().is_some() && !iommu_attached, + ) + } + PciDeviceHandle::VfioUser(vfio_user_pci_device) => { + let mut dev = vfio_user_pci_device.lock().unwrap(); + for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { + for region in zone.regions() { + dev.dma_unmap(region) + .map_err(DeviceManagerError::VfioUserDmaUnmap)?; + } + } + + ( + Arc::clone(&vfio_user_pci_device) as Arc>, + Arc::clone(&vfio_user_pci_device) as Arc, + None as Option>>, + true, + ) + } + }; + + if remove_dma_handler { + for virtio_mem_device in &virtio_mem_devices { + let source = VirtioMemMappingSource::Device(pci_device_bdf.into()); + virtio_mem_device + .lock() + .unwrap() + .remove_dma_mapping_handler(&source) + .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; + } + } + + pci_device + .lock() + .unwrap() + .free_bars( + &mut self.address_manager.allocator.lock().unwrap(), + &mut mem32_allocator.lock().unwrap(), + &mut mem64_allocator.lock().unwrap(), + ) + .map_err(DeviceManagerError::FreePciBars)?; + + pci_bus + .lock() + .unwrap() + .remove_by_device(&pci_device) + .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; + + #[cfg(target_arch = "x86_64")] + self.address_manager + .io_bus + .remove_by_device(bus_device.as_ref()) + .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; + + self.address_manager + .mmio_bus + .remove_by_device(bus_device.as_ref()) + .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; + + { + let mut shared_state = self.shared_state.lock().unwrap(); + shared_state.remove_bus_device(&bus_device); + shared_state.cleanup_vfio_container(); + } + + if let Some(virtio_device) = &virtio_device { + for mapping in virtio_device.lock().unwrap().userspace_mappings() { + // SAFETY: userspace_mappings only contains mappings created by the VMM. + unsafe { + self.memory_manager + .lock() + .unwrap() + .remove_userspace_mapping( + mapping.addr.raw_value(), + mapping.mapping.size(), + mapping.mapping.as_ptr() as _, + mapping.mergeable, + mapping.mem_slot, + ) + .map_err(DeviceManagerError::MemoryManager) + }?; + } + + virtio_device.lock().unwrap().shutdown(); + } + + event!( + "vm", + "device-removed", + "id", + &id, + "bdf", + pci_device_bdf.to_string() + ); + + Ok(virtio_device) + } +} + fn create_mmio_allocators( start: u64, end: u64, @@ -1155,6 +1387,14 @@ fn create_mmio_allocators( } impl DeviceManager { + fn pci_hotplug_state(&self) -> std::sync::MutexGuard<'_, PciHotplugSharedState> { + self.pci_hotplug_state.lock().unwrap() + } + + fn push_bus_device(&self, bus_device: Arc) { + self.pci_hotplug_state().push_bus_device(bus_device); + } + #[allow(clippy::too_many_arguments)] pub fn new( io_bus: Arc, @@ -1287,9 +1527,26 @@ impl DeviceManager { )?); } + let mmio_regions = Arc::new(Mutex::new(Vec::new())); + let acpi_cpu_hotplug_controller = AcpiCpuHotplugController::new(&cpu_manager.lock().unwrap()); let acpi_cpu_hotplug_controller = Arc::new(Mutex::new(acpi_cpu_hotplug_controller)); + let pci_hotplug_state = Arc::new(Mutex::new(PciHotplugSharedState { + pci_segments: pci_segments.clone(), + selected_segment: 0, + bus_devices: Vec::new(), + vfio_container: None, + iommu_attached_devices: None, + virtio_mem_devices: Vec::new(), + })); + let acpi_pci_hotplug_controller = Arc::new(Mutex::new(AcpiPciHotplugController::new( + Arc::clone(&pci_hotplug_state), + Arc::clone(&address_manager), + Arc::clone(&memory_manager), + Arc::clone(&device_tree), + Arc::clone(&mmio_regions), + ))); if dynamic { let acpi_address = address_manager @@ -1348,15 +1605,12 @@ impl DeviceManager { cpu_manager, virtio_devices: VecDeque::new(), block_devices: vec![], - bus_devices: Vec::new(), device_id_cnt, msi_interrupt_manager, legacy_interrupt_manager: None, passthrough_device: None, - vfio_container: None, iommu_device: None, iommu_mapping: None, - iommu_attached_devices: None, pci_segments, device_tree, exit_evt, @@ -1372,11 +1626,10 @@ impl DeviceManager { .try_clone() .map_err(DeviceManagerError::EventFd)?, acpi_address, - selected_segment: 0, + pci_hotplug_state, serial_manager: None, console_resize_pipe: None, original_termios_opt: Arc::new(Mutex::new(None)), - virtio_mem_devices: Vec::new(), #[cfg(target_arch = "aarch64")] gpio_device: None, #[cfg(feature = "pvmemcontrol")] @@ -1392,20 +1645,23 @@ impl DeviceManager { acpi_platform_addresses: AcpiPlatformAddresses::default(), snapshot: snapshot.cloned(), rate_limit_groups, - mmio_regions: Arc::new(Mutex::new(Vec::new())), + mmio_regions, #[cfg(feature = "fw_cfg")] fw_cfg: None, #[cfg(feature = "ivshmem")] ivshmem_device: None, _acpi_cpu_hotplug_controller: acpi_cpu_hotplug_controller, + _acpi_pci_hotplug_controller: acpi_pci_hotplug_controller, }; let device_manager = Arc::new(Mutex::new(device_manager)); + let acpi_pci_hotplug_controller = + Arc::clone(&device_manager.lock().unwrap()._acpi_pci_hotplug_controller); address_manager .mmio_bus .insert( - Arc::clone(&device_manager) as Arc, + acpi_pci_hotplug_controller as Arc, acpi_address.0, DEVICE_MANAGER_ACPI_SIZE as u64, ) @@ -1493,8 +1749,7 @@ impl DeviceManager { #[cfg(not(target_arch = "riscv64"))] if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { let tpm_dev = self.add_tpm_device(&tpm.socket)?; - self.bus_devices - .push(Arc::clone(&tpm_dev) as Arc); + self.push_bus_device(Arc::clone(&tpm_dev) as Arc); } self.legacy_interrupt_manager = Some(legacy_interrupt_manager); @@ -1532,8 +1787,7 @@ impl DeviceManager { self.fw_cfg = Some(fw_cfg.clone()); - self.bus_devices - .push(Arc::clone(&fw_cfg) as Arc); + self.push_bus_device(Arc::clone(&fw_cfg) as Arc); #[cfg(target_arch = "x86_64")] self.address_manager @@ -1697,19 +1951,18 @@ impl DeviceManager { if let Some(iommu_device) = iommu_device { let dev_id = self.add_virtio_pci_device(iommu_device, &None, &iommu_id, 0, None, None)?; - self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); + self.pci_hotplug_state().iommu_attached_devices = + Some((dev_id, iommu_attached_devices)); } } for segment in &self.pci_segments { #[cfg(target_arch = "x86_64")] if let Some(pci_config_io) = segment.pci_config_io.as_ref() { - self.bus_devices - .push(Arc::clone(pci_config_io) as Arc); + self.push_bus_device(Arc::clone(pci_config_io) as Arc); } - self.bus_devices - .push(Arc::clone(&segment.pci_config_mmio) as Arc); + self.push_bus_device(Arc::clone(&segment.pci_config_mmio) as Arc); } Ok(()) @@ -1831,8 +2084,7 @@ impl DeviceManager { .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) .map_err(DeviceManagerError::BusError)?; - self.bus_devices - .push(Arc::clone(&interrupt_controller) as Arc); + self.push_bus_device(Arc::clone(&interrupt_controller) as Arc); // Fill the device tree with a new node. In case of restore, we // know there is nothing to do, so we can simply override the @@ -1870,8 +2122,7 @@ impl DeviceManager { vcpus_pause_signalled, ))); - self.bus_devices - .push(Arc::clone(&shutdown_device) as Arc); + self.push_bus_device(Arc::clone(&shutdown_device) as Arc); #[cfg(target_arch = "x86_64")] { @@ -1933,13 +2184,11 @@ impl DeviceManager { devices::acpi::GED_DEVICE_ACPI_SIZE as u64, ) .map_err(DeviceManagerError::BusError)?; - self.bus_devices - .push(Arc::clone(&ged_device) as Arc); + self.push_bus_device(Arc::clone(&ged_device) as Arc); let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); - self.bus_devices - .push(Arc::clone(&pm_timer_device) as Arc); + self.push_bus_device(Arc::clone(&pm_timer_device) as Arc); #[cfg(target_arch = "x86_64")] { @@ -1985,8 +2234,7 @@ impl DeviceManager { vcpus_pause_signalled.clone(), ))); - self.bus_devices - .push(Arc::clone(&i8042) as Arc); + self.push_bus_device(Arc::clone(&i8042) as Arc); self.address_manager .io_bus @@ -2014,8 +2262,7 @@ impl DeviceManager { vcpus_pause_signalled.clone(), ))); - self.bus_devices - .push(Arc::clone(&cmos) as Arc); + self.push_bus_device(Arc::clone(&cmos) as Arc); self.address_manager .io_bus @@ -2024,8 +2271,7 @@ impl DeviceManager { let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); - self.bus_devices - .push(Arc::clone(&fwdebug) as Arc); + self.push_bus_device(Arc::clone(&fwdebug) as Arc); self.address_manager .io_bus @@ -2035,8 +2281,7 @@ impl DeviceManager { // 0x80 debug port let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); - self.bus_devices - .push(Arc::clone(&debug_port) as Arc); + self.push_bus_device(Arc::clone(&debug_port) as Arc); self.address_manager .io_bus .insert(debug_port, 0x80, 0x1) @@ -2061,8 +2306,7 @@ impl DeviceManager { let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new())); - self.bus_devices - .push(Arc::clone(&rtc_device) as Arc); + self.push_bus_device(Arc::clone(&rtc_device) as Arc); let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; @@ -2103,8 +2347,7 @@ impl DeviceManager { .map_err(DeviceManagerError::RestoreGetState)?, ))); - self.bus_devices - .push(Arc::clone(&gpio_device) as Arc); + self.push_bus_device(Arc::clone(&gpio_device) as Arc); let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; @@ -2152,8 +2395,7 @@ impl DeviceManager { .iobase .map_or(debug_console::DEFAULT_PORT, |port| port as u64); - self.bus_devices - .push(Arc::clone(&debug_console) as Arc); + self.push_bus_device(Arc::clone(&debug_console) as Arc); self.address_manager .allocator @@ -2203,8 +2445,7 @@ impl DeviceManager { .map_err(DeviceManagerError::RestoreGetState)?, ))); - self.bus_devices - .push(Arc::clone(&serial) as Arc); + self.push_bus_device(Arc::clone(&serial) as Arc); self.address_manager .allocator @@ -2260,8 +2501,7 @@ impl DeviceManager { .map_err(DeviceManagerError::RestoreGetState)?, ))); - self.bus_devices - .push(Arc::clone(&serial) as Arc); + self.push_bus_device(Arc::clone(&serial) as Arc); let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; @@ -2323,8 +2563,7 @@ impl DeviceManager { .map_err(DeviceManagerError::RestoreGetState)?, ))); - self.bus_devices - .push(Arc::clone(&serial) as Arc); + self.push_bus_device(Arc::clone(&serial) as Arc); let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; @@ -3548,7 +3787,9 @@ impl DeviceManager { // if needed. virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); - self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); + self.pci_hotplug_state() + .virtio_mem_devices + .push(Arc::clone(&virtio_mem_device)); self.virtio_devices.push_back(MetaVirtioDevice { virtio_device: Arc::clone(&virtio_mem_device) @@ -3853,6 +4094,7 @@ impl DeviceManager { // the same VFIO container since we couldn't map/unmap memory for each // device. That's simply because the map/unmap operations happen at the // VFIO container level. + let shared_vfio_container = self.pci_hotplug_state().vfio_container.clone(); let vfio_container = if device_cfg.iommu { let vfio_container = self.create_vfio_container()?; @@ -3872,12 +4114,12 @@ impl DeviceManager { } vfio_container - } else if let Some(vfio_container) = &self.vfio_container { + } else if let Some(vfio_container) = &shared_vfio_container { Arc::clone(vfio_container) } else { let vfio_container = self.create_vfio_container()?; needs_dma_mapping = true; - self.vfio_container = Some(Arc::clone(&vfio_container)); + self.pci_hotplug_state().vfio_container = Some(Arc::clone(&vfio_container)); vfio_container }; @@ -3913,7 +4155,8 @@ impl DeviceManager { Arc::clone(&self.mmio_regions), )); - for virtio_mem_device in self.virtio_mem_devices.iter() { + let virtio_mem_devices = self.pci_hotplug_state().virtio_mem_devices.clone(); + for virtio_mem_device in &virtio_mem_devices { virtio_mem_device .lock() .unwrap() @@ -4031,7 +4274,7 @@ impl DeviceManager { .add_device(bdf.device() as u32, pci_device) .map_err(DeviceManagerError::AddPciDevice)?; - self.bus_devices.push(Arc::clone(&bus_device)); + self.push_bus_device(Arc::clone(&bus_device)); pci_bus .register_mapping( @@ -4126,7 +4369,8 @@ impl DeviceManager { let memory = self.memory_manager.lock().unwrap().guest_memory(); let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); - for virtio_mem_device in self.virtio_mem_devices.iter() { + let virtio_mem_devices = self.pci_hotplug_state().virtio_mem_devices.clone(); + for virtio_mem_device in &virtio_mem_devices { virtio_mem_device .lock() .unwrap() @@ -4264,7 +4508,8 @@ impl DeviceManager { } else { // Let every virtio-mem device handle the DMA map/unmap through the // DMA handler provided. - for virtio_mem_device in self.virtio_mem_devices.iter() { + let virtio_mem_devices = self.pci_hotplug_state().virtio_mem_devices.clone(); + for virtio_mem_device in &virtio_mem_devices { virtio_mem_device .lock() .unwrap() @@ -4528,7 +4773,7 @@ impl DeviceManager { } // Take care of updating the memory for VFIO PCI devices. - if let Some(vfio_container) = &self.vfio_container { + if let Some(vfio_container) = &self.pci_hotplug_state().vfio_container { // vfio_dma_map is unsound and ought to be marked as unsafe #[allow(unused_unsafe)] // SAFETY: GuestMemoryMmap guarantees that region points @@ -4605,7 +4850,8 @@ impl DeviceManager { let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; // Update the PCIU bitmap - self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); + self.pci_hotplug_state().pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= + 1 << bdf.device(); Ok(PciDeviceInfo { id: device_name, @@ -4622,7 +4868,8 @@ impl DeviceManager { let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; // Update the PCIU bitmap - self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); + self.pci_hotplug_state().pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= + 1 << bdf.device(); Ok(PciDeviceInfo { id: device_name, @@ -4726,225 +4973,24 @@ impl DeviceManager { } // Update the PCID bitmap - self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); + self.pci_hotplug_state().pci_segments[pci_segment_id as usize].pci_devices_down |= + 1 << pci_device_bdf.device(); Ok(()) } pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { - info!("Ejecting device_id = {device_id} on segment_id={pci_segment_id}"); - - // Convert the device ID into the corresponding b/d/f. - let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); - - // Give the PCI device ID back to the PCI bus. - self.pci_segments[pci_segment_id as usize] - .pci_bus + let virtio_device = self + ._acpi_pci_hotplug_controller .lock() .unwrap() - .put_device_id(device_id as usize) - .map_err(DeviceManagerError::PutPciDeviceId)?; - - let (pci_device_handle, id) = { - // Remove the device from the device tree along with its children. - let mut device_tree = self.device_tree.lock().unwrap(); - let pci_device_node = device_tree - .remove_node_by_pci_bdf(pci_device_bdf) - .ok_or(DeviceManagerError::MissingPciDevice)?; - - // For VFIO and vfio-user the PCI device id is the id. - // For virtio we overwrite it later as we want the id of the - // underlying device. - let mut id = pci_device_node.id; - let pci_device_handle = pci_device_node - .pci_device_handle - .ok_or(DeviceManagerError::MissingPciDevice)?; - if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { - // The virtio-pci device has a single child - if !pci_device_node.children.is_empty() { - assert_eq!(pci_device_node.children.len(), 1); - let child_id = &pci_device_node.children[0]; - id.clone_from(child_id); - } - } - for child in pci_device_node.children.iter() { - device_tree.remove(child); - } - - (pci_device_handle, id) - }; - - let mut iommu_attached = false; - if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices - && iommu_attached_devices.contains(&pci_device_bdf) - { - iommu_attached = true; - } - - let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { - // VirtioMemMappingSource::Container cleanup is handled by - // cleanup_vfio_container when the last VFIO device is removed. - PciDeviceHandle::Vfio(vfio_pci_device) => { - // Remove this device's MMIO regions from the DeviceManager's - // mmio_regions list. We match on UserMemoryRegion slot numbers - // rather than MmioRegion start addresses because move_bar() - // updates the device's region addresses but not the - // DeviceManager's cloned copies. - let device_regions = vfio_pci_device.lock().unwrap().mmio_regions().clone(); - let mut mmio_regions = self.mmio_regions.lock().unwrap(); - for device_region in &device_regions { - mmio_regions.retain(|x| !x.has_matching_slots(device_region)); - } - - ( - Arc::clone(&vfio_pci_device) as Arc>, - Arc::clone(&vfio_pci_device) as Arc, - None as Option>>, - false, - ) - } - PciDeviceHandle::Virtio(virtio_pci_device) => { - let dev = virtio_pci_device.lock().unwrap(); - let bar_addr = dev.config_bar_addr(); - for (event, addr) in dev.ioeventfds(bar_addr) { - let io_addr = IoEventAddress::Mmio(addr); - self.address_manager - .vm - .unregister_ioevent(event, &io_addr) - .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; - } - - if let Some(dma_handler) = dev.dma_handler() - && !iommu_attached - { - for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { - for region in zone.regions() { - let iova = region.start_addr().0; - let size = region.len(); - dma_handler - .unmap(iova, size) - .map_err(DeviceManagerError::VirtioDmaUnmap)?; - } - } - } + .eject_device(pci_segment_id, device_id)?; - ( - Arc::clone(&virtio_pci_device) as Arc>, - Arc::clone(&virtio_pci_device) as Arc, - Some(dev.virtio_device()), - dev.dma_handler().is_some() && !iommu_attached, - ) - } - PciDeviceHandle::VfioUser(vfio_user_pci_device) => { - let mut dev = vfio_user_pci_device.lock().unwrap(); - for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { - for region in zone.regions() { - dev.dma_unmap(region) - .map_err(DeviceManagerError::VfioUserDmaUnmap)?; - } - } - - ( - Arc::clone(&vfio_user_pci_device) as Arc>, - Arc::clone(&vfio_user_pci_device) as Arc, - None as Option>>, - true, - ) - } - }; - - if remove_dma_handler { - for virtio_mem_device in self.virtio_mem_devices.iter() { - let source = VirtioMemMappingSource::Device(pci_device_bdf.into()); - virtio_mem_device - .lock() - .unwrap() - .remove_dma_mapping_handler(&source) - .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; - } - } - - // Free the allocated BARs - pci_device - .lock() - .unwrap() - .free_bars( - &mut self.address_manager.allocator.lock().unwrap(), - &mut self.pci_segments[pci_segment_id as usize] - .mem32_allocator - .lock() - .unwrap(), - &mut self.pci_segments[pci_segment_id as usize] - .mem64_allocator - .lock() - .unwrap(), - ) - .map_err(DeviceManagerError::FreePciBars)?; - - // Remove the device from the PCI bus - self.pci_segments[pci_segment_id as usize] - .pci_bus - .lock() - .unwrap() - .remove_by_device(&pci_device) - .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; - - #[cfg(target_arch = "x86_64")] - // Remove the device from the IO bus - self.io_bus() - .remove_by_device(bus_device.as_ref()) - .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; - - // Remove the device from the MMIO bus - self.mmio_bus() - .remove_by_device(bus_device.as_ref()) - .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; - - // Remove the device from the list of BusDevice held by the - // DeviceManager. - self.bus_devices - .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); - - // Shutdown and remove the underlying virtio-device if present if let Some(virtio_device) = virtio_device { - for mapping in virtio_device.lock().unwrap().userspace_mappings() { - // SAFETY: userspace_mappings only has valid mappings. - // TODO: do not rely on the correctness of all the code in this file - // for this to hold. - unsafe { - self.memory_manager - .lock() - .unwrap() - .remove_userspace_mapping( - mapping.addr.raw_value(), - mapping.mapping.size(), - mapping.mapping.as_ptr() as _, - mapping.mergeable, - mapping.mem_slot, - ) - .map_err(DeviceManagerError::MemoryManager) - }?; - } - - virtio_device.lock().unwrap().shutdown(); - self.virtio_devices .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); } - event!( - "vm", - "device-removed", - "id", - &id, - "bdf", - pci_device_bdf.to_string() - ); - - // At this point, the device has been removed from all the list and - // buses where it was stored. At the end of this function, after - // any_device, bus_device and pci_device are released, the actual - // device will be dropped. Ok(()) } @@ -4973,7 +5019,8 @@ impl DeviceManager { )?; // Update the PCIU bitmap - self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); + self.pci_hotplug_state().pci_segments[handle.pci_segment as usize].pci_devices_up |= + 1 << bdf.device(); Ok(PciDeviceInfo { id: handle.id, bdf }) } @@ -5142,8 +5189,8 @@ impl DeviceManager { .map_err(DeviceManagerError::PowerButtonNotification); } - pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec)> { - &self.iommu_attached_devices + pub fn iommu_attached_devices(&self) -> Option<(PciBdf, Vec)> { + self.pci_hotplug_state().iommu_attached_devices.clone() } fn validate_identifier(&self, id: &Option) -> DeviceManagerResult<()> { @@ -5164,14 +5211,6 @@ impl DeviceManager { &self.acpi_platform_addresses } - fn cleanup_vfio_container(&mut self) { - // Drop the 'vfio container' instance when "Self" is the only reference - if let Some(1) = self.vfio_container.as_ref().map(Arc::strong_count) { - debug!("Drop 'vfio container' given no active 'vfio devices'."); - self.vfio_container = None; - } - } - /// Helps the environment converge quickly after a live migration by /// prompting devices to advertise the VM from its new host. /// @@ -5671,38 +5710,36 @@ const PCID_FIELD_SIZE: usize = 4; const B0EJ_FIELD_SIZE: usize = 4; const PSEG_FIELD_SIZE: usize = 4; -impl BusDevice for DeviceManager { +impl BusDevice for AcpiPciHotplugController { fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { + let mut shared_state = self.shared_state.lock().unwrap(); + let selected_segment = shared_state.selected_segment; match offset { PCIU_FIELD_OFFSET => { assert!(data.len() == PCIU_FIELD_SIZE); data.copy_from_slice( - &self.pci_segments[self.selected_segment] + &shared_state.pci_segments[selected_segment] .pci_devices_up .to_le_bytes(), ); - // Clear the PCIU bitmap - self.pci_segments[self.selected_segment].pci_devices_up = 0; + shared_state.pci_segments[selected_segment].pci_devices_up = 0; } PCID_FIELD_OFFSET => { assert!(data.len() == PCID_FIELD_SIZE); data.copy_from_slice( - &self.pci_segments[self.selected_segment] + &shared_state.pci_segments[selected_segment] .pci_devices_down .to_le_bytes(), ); - // Clear the PCID bitmap - self.pci_segments[self.selected_segment].pci_devices_down = 0; + shared_state.pci_segments[selected_segment].pci_devices_down = 0; } B0EJ_FIELD_OFFSET => { assert!(data.len() == B0EJ_FIELD_SIZE); - // Always return an empty bitmap since the eject is always - // taken care of right away during a write access. data.fill(0); } PSEG_FIELD_OFFSET => { assert_eq!(data.len(), PSEG_FIELD_SIZE); - data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); + data.copy_from_slice(&(selected_segment as u32).to_le_bytes()); } _ => error!("Accessing unknown location at base 0x{base:x}, offset 0x{offset:x}"), } @@ -5714,16 +5751,16 @@ impl BusDevice for DeviceManager { match offset { B0EJ_FIELD_OFFSET => { assert!(data.len() == B0EJ_FIELD_SIZE); + let selected_segment = self.shared_state.lock().unwrap().selected_segment as u16; let mut data_array: [u8; 4] = [0, 0, 0, 0]; data_array.copy_from_slice(data); let mut slot_bitmap = u32::from_le_bytes(data_array); while slot_bitmap > 0 { let slot_id = slot_bitmap.trailing_zeros(); - if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { + if let Err(e) = self.eject_device(selected_segment, slot_id as u8) { error!("Failed ejecting device {slot_id}: {e:?}"); } - self.cleanup_vfio_container(); slot_bitmap &= !(1 << slot_id); } } @@ -5732,15 +5769,16 @@ impl BusDevice for DeviceManager { let mut data_array: [u8; 4] = [0, 0, 0, 0]; data_array.copy_from_slice(data); let selected_segment = u32::from_le_bytes(data_array) as usize; - if selected_segment >= self.pci_segments.len() { + let mut shared_state = self.shared_state.lock().unwrap(); + if selected_segment >= shared_state.pci_segments.len() { error!( "Segment selection out of range: {} >= {}", selected_segment, - self.pci_segments.len() + shared_state.pci_segments.len() ); return None; } - self.selected_segment = selected_segment; + shared_state.selected_segment = selected_segment; } _ => error!("Accessing unknown location at base 0x{base:x}, offset 0x{offset:x}"), } diff --git a/vmm/src/pci_segment.rs b/vmm/src/pci_segment.rs index 9b0d88e141..9c24a07697 100644 --- a/vmm/src/pci_segment.rs +++ b/vmm/src/pci_segment.rs @@ -23,6 +23,7 @@ use vm_device::BusDeviceSync; use crate::device_manager::{AddressManager, DeviceManagerError, DeviceManagerResult}; +#[derive(Clone)] pub(crate) struct PciSegment { pub(crate) id: u16, pub(crate) pci_bus: Arc>, diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 96928b4c1f..2b1cea8ce5 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -1793,8 +1793,7 @@ impl Vm { .lock() .unwrap() .iommu_attached_devices() - .as_ref() - .map(|(v, _)| *v); + .map(|(v, _)| v); let vgic = self .device_manager