From 33a511900046e647ccaecf21c6a776c40188291a Mon Sep 17 00:00:00 2001 From: John Starks Date: Thu, 21 May 2026 22:17:41 +0000 Subject: [PATCH 1/4] vmm_core: decouple chipset MMIO ranges from MemoryLayout The chipset MMIO ranges (low below 4 GiB, high above RAM) are a logical concept of the chipset configuration, but they were previously read by positional indexing into MemoryLayout::mmio(). This was fragile: it required exactly two MMIO entries, scattered assertions and error paths enforced that invariant, and the coupling prevented evolving the memory layout's MMIO accounting independently of the chipset's view of MMIO. Introduce a ChipsetMmioRanges struct that carries the chipset low, high, and VTL2 MMIO ranges explicitly, and thread it through the loader, ACPI builder, DSDT construction, MTRR computation, firmware_pcat, and IGVM/UEFI/Linux boot paths. This removes all the mmio().len() >= 2 assertions and error variants, since the ranges are now provided directly rather than reverse-engineered from the memory layout. As a drive-by, the VTL2 framebuffer GPA allocation moves into the memory layout resolver so it participates in the same non-overlapping placement as other post-MMIO ranges, and build_acpi_tables's closure no longer receives a &MemoryLayout it didn't need. --- openhcl/underhill_core/src/loader/mod.rs | 21 ++- openhcl/underhill_core/src/worker.rs | 82 ++++++--- openvmm/openvmm_core/src/worker/dispatch.rs | 160 +++++++----------- .../openvmm_core/src/worker/memory_layout.rs | 79 ++++++--- .../src/worker/vm_loaders/igvm.rs | 37 ++-- .../src/worker/vm_loaders/linux.rs | 23 +-- .../src/worker/vm_loaders/uefi.rs | 19 +-- vm/acpi/src/dsdt.rs | 8 +- vm/devices/firmware/firmware_pcat/Cargo.toml | 4 +- vm/devices/firmware/firmware_pcat/src/lib.rs | 21 ++- vm/loader/src/common.rs | 32 ++-- vmm_core/src/acpi_builder.rs | 10 +- 12 files changed, 261 insertions(+), 235 deletions(-) diff --git a/openhcl/underhill_core/src/loader/mod.rs b/openhcl/underhill_core/src/loader/mod.rs index f737834ecc..03fe60f258 100644 --- a/openhcl/underhill_core/src/loader/mod.rs +++ b/openhcl/underhill_core/src/loader/mod.rs @@ -5,6 +5,7 @@ use self::vtl2_config::RuntimeParameters; use crate::loader::vtl0_config::LinuxInfo; +use crate::worker::ChipsetMmioRanges; use crate::worker::FirmwareType; use cvm_tracing::CVM_ALLOWED; use guest_emulation_transport::api::platform_settings::DevicePlatformSettings; @@ -78,9 +79,6 @@ pub enum Error { InvalidAcpiTableLength, #[error("invalid acpi table: unknown header signature {0:?}")] InvalidAcpiTableSignature([u8; 4]), - #[cfg(guest_arch = "x86_64")] - #[error("acpi tables require at least two mmio ranges")] - UnsupportedMmio, #[cfg(guest_arch = "aarch64")] #[error("expected GICv3 topology")] ExpectedGicV3, @@ -115,6 +113,7 @@ pub fn load( config: Config, caps: &virt::PartitionCapabilities, isolated: bool, + chipset_mmio: &ChipsetMmioRanges, ) -> Result { let context = match load_kind { LoadKind::None => { @@ -137,6 +136,7 @@ pub fn load( platform_config, caps, isolated, + chipset_mmio, )?; uefi_info.vp_context.clone() } @@ -188,6 +188,7 @@ pub fn load( processor_topology, platform_config, chipset_capabilities, + chipset_mmio, kernel_range: *kernel_range, kernel_entrypoint: *kernel_entrypoint, initrd: *initrd, @@ -236,6 +237,7 @@ struct LoadLinuxParams<'a> { processor_topology: &'a ProcessorTopology, platform_config: &'a DevicePlatformSettings, chipset_capabilities: VmChipsetCapabilities, + chipset_mmio: &'a ChipsetMmioRanges, /// The region of memory used by the kernel. kernel_range: MemoryRange, /// The entrypoint of the kernel. @@ -261,6 +263,7 @@ fn load_linux(params: LoadLinuxParams<'_>) -> Result { processor_topology, platform_config, chipset_capabilities, + chipset_mmio, kernel_range, kernel_entrypoint, initrd, @@ -287,11 +290,7 @@ fn load_linux(params: LoadLinuxParams<'_>) -> Result { }, }; - if mem_layout.mmio().len() < 2 { - return Err(Error::UnsupportedMmio); - } - - let acpi_tables = acpi_builder.build_acpi_tables(ACPI_BASE, |mem_layout, dsdt| { + let acpi_tables = acpi_builder.build_acpi_tables(ACPI_BASE, |dsdt| { dsdt.add_apic(); // Add serial ports if enabled. @@ -315,7 +314,7 @@ fn load_linux(params: LoadLinuxParams<'_>) -> Result { ); } - dsdt.add_mmio_module(mem_layout.mmio()[0], mem_layout.mmio()[1]); + dsdt.add_mmio_module(chipset_mmio.low, chipset_mmio.high); // TODO: change this once PCI is running in underhill dsdt.add_vmbus(false, None); dsdt.add_rtc(); @@ -425,6 +424,7 @@ pub fn write_uefi_config( platform_config: &DevicePlatformSettings, caps: &virt::PartitionCapabilities, isolated: bool, + chipset_mmio: &ChipsetMmioRanges, ) -> Result<(), Error> { use guest_emulation_transport::api::platform_settings::UefiConsoleMode; @@ -524,8 +524,7 @@ pub fn write_uefi_config( ) .add_raw( config::BlobStructureType::MmioRanges, - mem_layout - .mmio() + [chipset_mmio.low, chipset_mmio.high] .iter() .map(|range| config::Mmio { mmio_page_number_start: range.start() / HV_PAGE_SIZE, diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index e2f08e18bd..70fc03ad54 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -1158,13 +1158,28 @@ struct BuiltVtl0MemoryLayout { complete_memory_layout: MemoryLayout, } +/// Chipset MMIO ranges. +#[derive(Debug, Copy, Clone)] +pub(crate) struct ChipsetMmioRanges { + /// Chipset low MMIO range (below 4 GB) for VMOD/PCI0 _CRS. Always at + /// least the architectural reserved zone (LAPIC, IOAPIC, TPM, ...). + pub low: MemoryRange, + /// Chipset high MMIO range (above RAM) for VMOD/PCI0 _CRS. `EMPTY` when + /// no chipset high MMIO is configured. + pub high: MemoryRange, +} + /// Build the VTL0 memory map after carving out any memory requested for shared /// visibility memory to be used by VTL2. fn build_vtl0_memory_layout( vtl0_memory_map: Vec<(MemoryRangeWithNode, MemoryMapEntryType)>, - mmio: &[MemoryRange], + chipset_mmio: &ChipsetMmioRanges, mut shared_pool_size: u64, ) -> anyhow::Result { + let mmio: Vec = [chipset_mmio.low, chipset_mmio.high] + .into_iter() + .filter(|r| !r.is_empty()) + .collect(); // Allocate shared_pool memory starting from the last (top of memory) // continuing downward until the size is covered. // @@ -1220,13 +1235,13 @@ fn build_vtl0_memory_layout( .collect::>(); let vtl0_memory_layout = - MemoryLayout::new_from_ranges(&memory, mmio).context("invalid memory layout")?; + MemoryLayout::new_from_ranges(&memory, &mmio).context("invalid memory layout")?; let complete_memory = vtl0_memory_map .iter() .map(|(entry, _typ)| entry.clone()) .collect::>(); - let complete_memory_layout = MemoryLayout::new_from_ranges(&complete_memory, mmio) + let complete_memory_layout = MemoryLayout::new_from_ranges(&complete_memory, &mmio) .context("invalid complete memory layout")?; tracing::info!( @@ -1694,20 +1709,34 @@ async fn new_underhill_vm( anyhow::bail!("cannot run the VPCI relay without the VMBus relay"); } - let mut vtl0_mmio; - let (vtl0_mmio, vpci_relay_mmio) = if enable_vpci_relay { - // Carve out enough VTL0 MMIO space for 64 devices. + // Construct chipset MMIO ranges from the positional convention in the + // device tree: [0] = low (below 4 GiB), [1] = high (above RAM). + let mut chipset_mmio = ChipsetMmioRanges { + low: boot_info + .vtl0_mmio + .first() + .copied() + .unwrap_or(MemoryRange::EMPTY), + high: boot_info + .vtl0_mmio + .get(1) + .copied() + .unwrap_or(MemoryRange::EMPTY), + }; + + let vpci_relay_mmio = if enable_vpci_relay { + // Carve out enough VTL0 MMIO space from the high range for 64 devices. let required_len = 64 * vpci_relay::VPCI_RELAY_MMIO_PER_DEVICE; - vtl0_mmio = boot_info.vtl0_mmio.clone(); - if vtl0_mmio.last().is_none_or(|r| r.len() < required_len) { + if chipset_mmio.high.is_empty() || chipset_mmio.high.len() < required_len { anyhow::bail!("too little VTL0 MMIO space to take for the VPCI relay"); } - let r = vtl0_mmio.last().unwrap(); - let (rest, vpci) = r.split_at_offset(r.len() - required_len); - *vtl0_mmio.last_mut().unwrap() = rest; - (&vtl0_mmio, vpci) + let (rest, vpci) = chipset_mmio + .high + .split_at_offset(chipset_mmio.high.len() - required_len); + chipset_mmio.high = rest; + vpci } else { - (&boot_info.vtl0_mmio, MemoryRange::EMPTY) + MemoryRange::EMPTY }; let BuiltVtl0MemoryLayout { @@ -1715,7 +1744,7 @@ async fn new_underhill_vm( vtl0_memory_layout: mem_layout, shared_pool, complete_memory_layout, - } = build_vtl0_memory_layout(vtl0_memory_map, vtl0_mmio, shared_pool_size)?; + } = build_vtl0_memory_layout(vtl0_memory_map, &chipset_mmio, shared_pool_size)?; // Determine if x2apic is supported so that the topology matches // reality. @@ -2446,6 +2475,8 @@ async fn new_underhill_vm( let config = firmware_pcat::config::PcatBiosConfig { processor_topology: processor_topology.clone(), mem_layout: mem_layout.clone(), + chipset_low_mmio: chipset_mmio.low, + chipset_high_mmio: chipset_mmio.high, srat: acpi_builder.build_srat(), hibernation_enabled: dps.general.hibernation_enabled, initial_generation_id, @@ -3002,9 +3033,12 @@ async fn new_underhill_vm( // By default on aarch64 send all MMIO accesses to the host. None } else { - let mut untrusted_mmio_ranges: Vec<_> = mem_layout.mmio().to_vec(); + let chipset_mmio_ranges = [chipset_mmio.low, chipset_mmio.high] + .into_iter() + .filter(|r| !r.is_empty()); + let mut untrusted_mmio_ranges: Vec<_> = chipset_mmio_ranges.clone().collect(); if vtom > 0 { - untrusted_mmio_ranges.extend(mem_layout.mmio().iter().map(|range| { + untrusted_mmio_ranges.extend(chipset_mmio_ranges.map(|range| { MemoryRange::new((range.start() + vtom)..(range.end() + vtom)) })); } @@ -3592,6 +3626,7 @@ async fn new_underhill_vm( load_kind, &dps, isolation.is_isolated(), + &chipset_mmio, ) .instrument(tracing::info_span!("load_firmware", CVM_ALLOWED)) .await?; @@ -3871,6 +3906,7 @@ async fn load_firmware( load_kind: LoadKind, dps: &DevicePlatformSettings, isolated: bool, + chipset_mmio: &ChipsetMmioRanges, ) -> Result<(), anyhow::Error> { let cmdline_append = match cmdline_append { Some(cmdline) => CString::new(cmdline.as_bytes()).context("bad command line")?, @@ -3891,19 +3927,19 @@ async fn load_firmware( loader_config, caps, isolated, + chipset_mmio, ) .context("failed to load firmware")?; #[cfg(guest_arch = "x86_64")] let registers = { let crate::loader::VpContext::Vbs(mut registers) = vtl0_vp_context; - registers.extend( - loader::common::compute_variable_mtrrs( - mem_layout, - partition.caps().physical_address_width, - ) - .context("Failed to compute variable mtrrs")?, - ); + registers.extend(loader::common::compute_variable_mtrrs( + mem_layout, + partition.caps().physical_address_width, + chipset_mmio.low, + chipset_mmio.high, + )); registers }; #[cfg(guest_arch = "aarch64")] diff --git a/openvmm/openvmm_core/src/worker/dispatch.rs b/openvmm/openvmm_core/src/worker/dispatch.rs index 7acf844917..4a13b3e9c3 100644 --- a/openvmm/openvmm_core/src/worker/dispatch.rs +++ b/openvmm/openvmm_core/src/worker/dispatch.rs @@ -9,6 +9,7 @@ use crate::emuplat; use crate::partition::BindHvliteVp; use crate::partition::HvlitePartition; use crate::vmgs_non_volatile_store::HvLiteVmgsNonVolatileStore; +use crate::worker::memory_layout::ChipsetMmioRanges; use crate::worker::memory_layout::MemoryLayoutInput; use crate::worker::memory_layout::ResolvedPcieRootComplexRanges; use crate::worker::memory_layout::resolve_memory_layout; @@ -415,9 +416,8 @@ pub(crate) struct InitializedVm { mem_layout: MemoryLayout, resolved_pcie_root_complex_ranges: Vec, virtio_mmio_region: MemoryRange, - chipset_low_mmio: MemoryRange, - chipset_high_mmio: MemoryRange, - vtl2_chipset_mmio: MemoryRange, + chipset_mmio: ChipsetMmioRanges, + vtl2_framebuffer_gpa_base: Option, #[cfg(guest_arch = "aarch64")] resolved_smmu_resources: Vec, processor_topology: ProcessorTopology, @@ -720,12 +720,8 @@ struct LoadedVmInner { virtio_mmio_region: MemoryRange, #[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))] virtio_mmio_irq: u32, - /// Chipset low MMIO range for VMOD/PCI0 _CRS. - chipset_low_mmio: MemoryRange, - /// Chipset high MMIO range for VMOD/PCI0 _CRS. - chipset_high_mmio: MemoryRange, - /// VTL2-private chipset MMIO range for VTL2 VMBus. - vtl2_chipset_mmio: MemoryRange, + /// Resolved chipset MMIO ranges. + chipset_mmio: ChipsetMmioRanges, /// ((device, function), interrupt) #[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))] pci_legacy_interrupts: Vec<((u8, Option), u32)>, @@ -1003,6 +999,14 @@ impl InitializedVm { #[cfg(not(guest_arch = "aarch64"))] let smmu_count = 0; + let vtl2_framebuffer_size = if cfg.vtl2_gfx { + cfg.framebuffer + .as_ref() + .context("no framebuffer configured")? + .len() as u64 + } else { + 0 + }; let resolved_layout = resolve_memory_layout(MemoryLayoutInput { mem_size: cfg.memory.mem_size, numa_mem_sizes: cfg.memory.numa_mem_sizes.as_deref(), @@ -1011,15 +1015,14 @@ impl InitializedVm { virtio_mmio_count, smmu_count, vtl2_layout, + vtl2_framebuffer_size, physical_address_size, }) .context("invalid memory configuration")?; let mem_layout = resolved_layout.memory_layout; let resolved_pcie_root_complex_ranges = resolved_layout.pcie_root_complex_ranges; let virtio_mmio_region = resolved_layout.virtio_mmio_region; - let chipset_low_mmio = resolved_layout.chipset_low_mmio; - let chipset_high_mmio = resolved_layout.chipset_high_mmio; - let vtl2_chipset_mmio = resolved_layout.vtl2_chipset_mmio; + let chipset_mmio = resolved_layout.chipset_mmio; // Combine SMMU MMIO ranges with SPI layout. #[cfg(guest_arch = "aarch64")] @@ -1156,9 +1159,8 @@ impl InitializedVm { mem_layout, resolved_pcie_root_complex_ranges, virtio_mmio_region, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio, + vtl2_framebuffer_gpa_base: resolved_layout.vtl2_framebuffer_gpa_base, #[cfg(guest_arch = "aarch64")] resolved_smmu_resources, processor_topology, @@ -1189,9 +1191,8 @@ impl InitializedVm { mem_layout, resolved_pcie_root_complex_ranges, virtio_mmio_region, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio, + vtl2_framebuffer_gpa_base, #[cfg(guest_arch = "aarch64")] resolved_smmu_resources, processor_topology, @@ -1404,6 +1405,8 @@ impl InitializedVm { firmware_pcat::config::PcatBiosConfig { processor_topology: processor_topology.clone(), mem_layout: mem_layout.clone(), + chipset_low_mmio: chipset_mmio.low, + chipset_high_mmio: chipset_mmio.high, srat, hibernation_enabled: false, @@ -1452,25 +1455,9 @@ impl InitializedVm { _ => {} }; - let vtl2_framebuffer_gpa_base = if cfg.vtl2_gfx { - // calculate a safe place to put the framebuffer mapping in GPA space - // this places it after the end of ram at the first place it won't overlap with MMIO - let len = cfg - .framebuffer - .as_ref() - .context("no framebuffer configured")? - .len(); - let mut gpa = mem_layout.end_of_ram(); - for mmio in mem_layout.mmio() { - if gpa < mmio.end() && mmio.start() < gpa + len as u64 { - gpa = mmio.end(); - } - } + if let Some(gpa) = vtl2_framebuffer_gpa_base { tracing::debug!("Vtl2 framebuffer gpa base: {:#x}", gpa); - Some(gpa) - } else { - None - }; + } let state_units = StateUnits::new(); @@ -2683,9 +2670,7 @@ impl InitializedVm { load_mode: cfg.load_mode, virtio_mmio_region, virtio_mmio_irq, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio, pci_legacy_interrupts, igvm_file, next_igvm_file: None, @@ -2784,20 +2769,17 @@ impl LoadedVmInner { cmdline, mem_layout: &self.mem_layout, }; - if custom_dsdt.is_none() && self.mem_layout.mmio().len() < 2 { - anyhow::bail!("at least two mmio regions are required"); - } let regs = super::vm_loaders::linux::load_linux_x86(&kernel_config, &self.gm, |gpa| { let tables = if let Some(dsdt) = custom_dsdt { acpi_builder.build_acpi_tables_custom_dsdt(gpa, dsdt) } else { - acpi_builder.build_acpi_tables(gpa, |mem_layout, dsdt| { + acpi_builder.build_acpi_tables(gpa, |dsdt| { add_devices_to_dsdt_x64( - mem_layout, dsdt, &self.chipset_cfg, enable_serial, + &self.chipset_mmio, self.virtio_mmio_region, self.virtio_mmio_irq, &self.pci_legacy_interrupts, @@ -2831,11 +2813,10 @@ impl LoadedVmInner { mem_layout: &self.mem_layout, }; - let with_hv = self.hypervisor_cfg.with_hv; let build_acpi = if boot_mode == LinuxDirectBootMode::Acpi { Some(|rsdp_gpa: u64| { - acpi_builder.build_acpi_tables(rsdp_gpa, |mem_layout, dsdt| { - add_devices_to_dsdt_arm64(mem_layout, dsdt, enable_serial, with_hv) + acpi_builder.build_acpi_tables(rsdp_gpa, |dsdt| { + add_devices_to_dsdt_arm64(dsdt, enable_serial, &self.chipset_mmio) }) }) } else { @@ -2849,6 +2830,7 @@ impl LoadedVmInner { &self.processor_topology, &self.pcie_host_bridges, &self.smmu_configs, + &self.chipset_mmio, build_acpi, )?; @@ -2891,6 +2873,7 @@ impl LoadedVmInner { &self.mem_layout, &self.pcie_host_bridges, load_settings, + &self.chipset_mmio, &madt, &srat, mcfg.as_deref(), @@ -2935,9 +2918,7 @@ impl LoadedVmInner { with_vmbus_redirect: self.vmbus_redirect, com_serial, entropy: Some(&entropy), - chipset_low_mmio: self.chipset_low_mmio, - chipset_high_mmio: self.chipset_high_mmio, - vtl2_chipset_mmio: self.vtl2_chipset_mmio, + chipset_mmio: self.chipset_mmio, }; super::vm_loaders::igvm::load_igvm(params)? } @@ -2951,13 +2932,12 @@ impl LoadedVmInner { // VTL2 will setup MTRRs for VTL0 if needed. #[cfg(guest_arch = "x86_64")] if self.hypervisor_cfg.with_vtl2.is_none() { - regs.extend( - loader::common::compute_variable_mtrrs( - &self.mem_layout, - self.partition.caps().physical_address_width, - ) - .context("failed to compute variable mtrrs")?, - ); + regs.extend(loader::common::compute_variable_mtrrs( + &self.mem_layout, + self.partition.caps().physical_address_width, + self.chipset_mmio.low, + self.chipset_mmio.high, + )); } // Only set initial page visibility on isolated partitions. @@ -3554,10 +3534,10 @@ impl LoadedVm { #[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))] fn add_devices_to_dsdt_x64( - mem_layout: &MemoryLayout, dsdt: &mut dsdt::Dsdt, cfg: &BaseChipsetManifest, serial_uarts: bool, + chipset_mmio: &ChipsetMmioRanges, virtio_mmio_region: MemoryRange, virtio_mmio_irq: u32, pci_legacy_interrupts: &[((u8, Option), u32)], // ((device, function), interrupt) @@ -3579,36 +3559,29 @@ fn add_devices_to_dsdt_x64( } } - assert!( - mem_layout.mmio().len() >= 2, - "the DSDT describes two MMIO regions" - ); - let low_mmio_gap = mem_layout.mmio()[0]; - let high_mmio_gap = mem_layout.mmio()[1]; - // Virtio-mmio devices are allocated as a contiguous region by the memory // layout resolver. Each 4 KiB slot is a separate device. - { - for i in 0..virtio_mmio_region.page_count_4k() { - let slot_base = virtio_mmio_region.start() + i * HV_PAGE_SIZE; - let mut device = dsdt::Device::new(format!("\\_SB.VI{i:02}").as_bytes()); - device.add_object(&dsdt::NamedString::new(b"_HID", b"LNRO0005")); - device.add_object(&dsdt::NamedInteger::new(b"_UID", i)); - let mut crs = dsdt::CurrentResourceSettings::new(); - crs.add_resource(&dsdt::QwordMemory::new(slot_base, HV_PAGE_SIZE)); - let mut intr = dsdt::Interrupt::new(virtio_mmio_irq); - intr.is_edge_triggered = false; - crs.add_resource(&intr); - device.add_object(&crs); - dsdt.add_object(&device); - } + for i in 0..virtio_mmio_region.page_count_4k() { + let slot_base = virtio_mmio_region.start() + i * HV_PAGE_SIZE; + let mut device = dsdt::Device::new(format!("\\_SB.VI{i:02}").as_bytes()); + device.add_object(&dsdt::NamedString::new(b"_HID", b"LNRO0005")); + device.add_object(&dsdt::NamedInteger::new(b"_UID", i)); + let mut crs = dsdt::CurrentResourceSettings::new(); + crs.add_resource(&dsdt::QwordMemory::new(slot_base, HV_PAGE_SIZE)); + let mut intr = dsdt::Interrupt::new(virtio_mmio_irq); + intr.is_edge_triggered = false; + crs.add_resource(&intr); + device.add_object(&crs); + dsdt.add_object(&device); } + // The chipset MMIO module or PCI bus describes the chipset low/high + // MMIO regions to the guest. if cfg.with_generic_pci_bus || cfg.with_i440bx_host_pci_bridge { // TODO: actually plumb through legacy PCI interrupts - dsdt.add_pci(low_mmio_gap, high_mmio_gap, pci_legacy_interrupts); + dsdt.add_pci(chipset_mmio.low, chipset_mmio.high, pci_legacy_interrupts); } else { - dsdt.add_mmio_module(low_mmio_gap, high_mmio_gap); + dsdt.add_mmio_module(chipset_mmio.low, chipset_mmio.high); } dsdt.add_vmbus( @@ -3620,10 +3593,9 @@ fn add_devices_to_dsdt_x64( #[cfg(guest_arch = "aarch64")] fn add_devices_to_dsdt_arm64( - mem_layout: &MemoryLayout, dsdt: &mut dsdt::Dsdt, enable_serial: bool, - with_hv: bool, + chipset_mmio: &ChipsetMmioRanges, ) { // VMBus GIC INTID (PPI 2 = INTID 16 + 2 = 18), matching the DT path. const VMBUS_INTID: u32 = openvmm_defs::config::DEFAULT_VMBUS_PPI; @@ -3635,22 +3607,12 @@ fn add_devices_to_dsdt_arm64( const PL011_SERIAL0_GSIV: u32 = 33; const PL011_SERIAL1_GSIV: u32 = 34; - if with_hv { - // Internal invariant: the memory layout for ARM64 with HV always has - // at least two MMIO gaps (low + high). This is configured by OpenVMM - // itself, not by guest input. - assert!( - mem_layout.mmio().len() >= 2, - "need at least two MMIO regions" - ); - let low_mmio_gap = mem_layout.mmio()[0]; - let high_mmio_gap: MemoryRange = mem_layout.mmio()[1]; - dsdt.add_mmio_module(low_mmio_gap, high_mmio_gap); - // VMBus on ARM64 ACPI needs a per-CPU interrupt (PPI) in _CRS. - // Always place under VMOD, not PCI0 — ARM64 doesn't use the x86 - // PCI0 DSDT node. - dsdt.add_vmbus(false, Some(VMBUS_INTID)); - } + dsdt.add_mmio_module(chipset_mmio.low, chipset_mmio.high); + + // VMBus on ARM64 ACPI needs a per-CPU interrupt (PPI) in _CRS. + // Always place under VMOD, not PCI0 — ARM64 doesn't use the x86 + // PCI0 DSDT node. + dsdt.add_vmbus(false, Some(VMBUS_INTID)); if enable_serial { dsdt.add_sbsa_uart( diff --git a/openvmm/openvmm_core/src/worker/memory_layout.rs b/openvmm/openvmm_core/src/worker/memory_layout.rs index 0ecd36c445..c0b68d40f5 100644 --- a/openvmm/openvmm_core/src/worker/memory_layout.rs +++ b/openvmm/openvmm_core/src/worker/memory_layout.rs @@ -49,6 +49,20 @@ const PCIE_ECAM_BYTES_PER_BUS: u64 = 32 * 8 * 4096; /// value, independent of any individual root complex's configuration. const PCIE_ECAM_MIN_ADDRESS: u64 = 256 * 1024 * 1024; +/// Resolved chipset MMIO ranges produced by the memory layout engine. +#[derive(Debug, Copy, Clone)] +pub(crate) struct ChipsetMmioRanges { + /// Chipset low MMIO range (below 4 GB) for VMOD/PCI0 _CRS. Always at + /// least the architectural reserved zone (LAPIC, IOAPIC, TPM, ...). + pub low: MemoryRange, + /// Chipset high MMIO range (above RAM) for VMOD/PCI0 _CRS. `EMPTY` when + /// no chipset high MMIO is configured. + pub high: MemoryRange, + /// VTL2-private chipset MMIO range, reported to VTL2 VMBus via the device + /// tree. `EMPTY` when VTL2 is not configured or has no chipset MMIO. + pub vtl2: MemoryRange, +} + #[derive(Debug)] pub(super) struct ResolvedMemoryLayout { pub memory_layout: MemoryLayout, @@ -57,15 +71,11 @@ pub(super) struct ResolvedMemoryLayout { /// 4 KiB, indexed from the start of the region. `EMPTY` when no /// virtio-mmio devices are configured. pub virtio_mmio_region: MemoryRange, - /// Chipset low MMIO range (below 4 GB) for VMOD/PCI0 _CRS. Always at - /// least the architectural reserved zone (LAPIC, IOAPIC, TPM, ...). - pub chipset_low_mmio: MemoryRange, - /// Chipset high MMIO range (above RAM) for VMOD/PCI0 _CRS. `EMPTY` when - /// no chipset high MMIO is configured. - pub chipset_high_mmio: MemoryRange, - /// VTL2-private chipset MMIO range, reported to VTL2 VMBus via the device - /// tree. `EMPTY` when VTL2 is not configured or has no chipset MMIO. - pub vtl2_chipset_mmio: MemoryRange, + /// Resolved chipset MMIO ranges. + pub chipset_mmio: ChipsetMmioRanges, + /// Resolved VTL2 framebuffer GPA base. `None` when VTL2 graphics is not + /// configured. + pub vtl2_framebuffer_gpa_base: Option, /// Resolved MMIO ranges for SMMUv3 instances, one per configured SMMU. /// Each range is `SMMU_SIZE` bytes. Empty when no SMMUs are configured. #[cfg_attr(not(guest_arch = "aarch64"), expect(dead_code))] @@ -102,6 +112,10 @@ pub(super) struct MemoryLayoutInput<'a> { /// Optional IGVM VTL2 private-memory request. This is allocated after all /// VTL0-visible RAM and MMIO and is carried separately from ordinary RAM. pub vtl2_layout: Option, + /// Size in bytes of the VTL2 framebuffer mapping. When non-zero, a + /// `PostMmio` allocation is created and the resolved GPA is returned in + /// `ResolvedMemoryLayout::vtl2_framebuffer_gpa_base`. + pub vtl2_framebuffer_size: u64, /// Host-supported physical address width used only after allocation. The /// allocator computes the smallest layout it can; host fit is validation. pub physical_address_size: u8, @@ -333,6 +347,19 @@ pub(super) fn resolve_memory_layout( ); } + // VTL2 framebuffer: a page-aligned PostMmio allocation so the GPA does + // not overlap RAM or any MMIO range. + let mut vtl2_framebuffer_range = MemoryRange::EMPTY; + if input.vtl2_framebuffer_size != 0 { + builder.request( + "vtl2-framebuffer", + &mut vtl2_framebuffer_range, + input.vtl2_framebuffer_size, + PAGE_SIZE, + Placement::PostMmio, + ); + } + let placed_ranges = builder .allocate() .context("allocating memory layout ranges")?; @@ -436,9 +463,16 @@ pub(super) fn resolve_memory_layout( memory_layout, pcie_root_complex_ranges, virtio_mmio_region, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio: ChipsetMmioRanges { + low: chipset_low_mmio, + high: chipset_high_mmio, + vtl2: vtl2_chipset_mmio, + }, + vtl2_framebuffer_gpa_base: if vtl2_framebuffer_range.is_empty() { + None + } else { + Some(vtl2_framebuffer_range.start()) + }, smmu_ranges, }) } @@ -542,6 +576,7 @@ mod tests { virtio_mmio_count: 0, smmu_count: 0, vtl2_layout, + vtl2_framebuffer_size: 0, physical_address_size: 46, } } @@ -619,8 +654,8 @@ mod tests { fn chipset_mmio_is_resolved() { let result = resolve_memory_layout(input(2 * GB, None, None)).unwrap(); - let low = result.chipset_low_mmio; - let high = result.chipset_high_mmio; + let low = result.chipset_mmio.low; + let high = result.chipset_mmio.high; assert_eq!(low.len(), DEFAULT_CHIPSET_LOW_MMIO_SIZE as u64); assert_eq!(high.len(), DEFAULT_CHIPSET_HIGH_MMIO_SIZE); // Chipset low MMIO is pinned to end at 4 GiB and must fully contain @@ -841,10 +876,10 @@ mod tests { let result = resolve_memory_layout(config).unwrap(); - let vtl2_mmio = result.vtl2_chipset_mmio; + let vtl2_mmio = result.chipset_mmio.vtl2; assert_eq!(vtl2_mmio.len(), DEFAULT_VTL2_CHIPSET_MMIO_SIZE); // VTL2 chipset MMIO should be after all VTL0-visible ranges. - let chipset_high = result.chipset_high_mmio; + let chipset_high = result.chipset_mmio.high; assert!( vtl2_mmio.start() >= chipset_high.end(), "VTL2 chipset MMIO should be after VTL0 high MMIO" @@ -874,10 +909,10 @@ mod tests { let result = resolve_memory_layout(config).unwrap(); - let low = result.chipset_low_mmio; + let low = result.chipset_mmio.low; assert_eq!(low.end(), 4 * GB); assert!(low.contains(&ARCH_RESERVED)); - assert!(result.chipset_high_mmio.is_empty()); + assert!(result.chipset_mmio.high.is_empty()); // The reported ranges must appear in MemoryLayout::mmio() preserving // the positional contract: [0] = low, [1] = high (EMPTY placeholder). assert_eq!(result.memory_layout.mmio(), &[low, MemoryRange::EMPTY]); @@ -890,16 +925,16 @@ mod tests { let mut config = input(2 * GB, None, None); config.layout.chipset_high_mmio_size = 0; let result = resolve_memory_layout(config).unwrap(); - assert!(!result.chipset_low_mmio.is_empty()); - assert!(result.chipset_high_mmio.is_empty()); + assert!(!result.chipset_mmio.low.is_empty()); + assert!(result.chipset_mmio.high.is_empty()); let mut config = input(2 * GB, None, None); config.layout.chipset_low_mmio_size = 0; let result = resolve_memory_layout(config).unwrap(); // Low is always at least the arch reserved zone. - assert!(!result.chipset_low_mmio.is_empty()); + assert!(!result.chipset_mmio.low.is_empty()); // High is still configured in this case. - assert!(!result.chipset_high_mmio.is_empty()); + assert!(!result.chipset_mmio.high.is_empty()); } #[test] diff --git a/openvmm/openvmm_core/src/worker/vm_loaders/igvm.rs b/openvmm/openvmm_core/src/worker/vm_loaders/igvm.rs index 9305c8dd0f..28b9e9be53 100644 --- a/openvmm/openvmm_core/src/worker/vm_loaders/igvm.rs +++ b/openvmm/openvmm_core/src/worker/vm_loaders/igvm.rs @@ -3,6 +3,7 @@ //! Loader implementation to load IGVM files. +use super::super::memory_layout::ChipsetMmioRanges; use guestmem::GuestMemory; use hvdef::HV_PAGE_SIZE; use igvm::IgvmDirectiveHeader; @@ -263,9 +264,7 @@ struct BuildDeviceTreeParams<'a> { with_vmbus_redirect: bool, com_serial: Option, entropy: Option<&'a [u8]>, - chipset_low_mmio: MemoryRange, - chipset_high_mmio: MemoryRange, - vtl2_chipset_mmio: MemoryRange, + chipset_mmio: ChipsetMmioRanges, } /// Build a device tree representing the whole guest partition. @@ -279,11 +278,15 @@ fn build_device_tree(params: BuildDeviceTreeParams<'_>) -> Result, fdt:: with_vmbus_redirect, com_serial, entropy, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio, } = params; + let ChipsetMmioRanges { + low: chipset_low_mmio, + high: chipset_high_mmio, + vtl2: vtl2_chipset_mmio, + } = chipset_mmio; + let mut buf = vec![0; HV_PAGE_SIZE as usize * 256]; let mut builder = fdt::builder::Builder::new(fdt::builder::BuilderConfig { @@ -523,12 +526,8 @@ pub struct LoadIgvmParams<'a, T: ArchTopology> { pub com_serial: Option, /// Entropy pub entropy: Option<&'a [u8]>, - /// VTL0 chipset low MMIO range for the device tree VMBus node. - pub chipset_low_mmio: MemoryRange, - /// VTL0 chipset high MMIO range for the device tree VMBus node. - pub chipset_high_mmio: MemoryRange, - /// VTL2-private chipset MMIO range for the device tree VTL2 VMBus node. - pub vtl2_chipset_mmio: MemoryRange, + /// Resolved chipset MMIO ranges for device tree and UEFI config. + pub chipset_mmio: ChipsetMmioRanges, } pub fn load_igvm( @@ -571,11 +570,15 @@ fn load_igvm_x86( with_vmbus_redirect, com_serial, entropy, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio, } = params; + let ChipsetMmioRanges { + low: chipset_low_mmio, + high: chipset_high_mmio, + .. + } = chipset_mmio; + let relocations_enabled = match vtl2_base_address { Vtl2BaseAddressType::File | Vtl2BaseAddressType::Vtl2Allocate { .. } => false, Vtl2BaseAddressType::Absolute(_) | Vtl2BaseAddressType::MemoryLayout { .. } => true, @@ -985,9 +988,7 @@ fn load_igvm_x86( with_vmbus_redirect, com_serial, entropy, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio, }) .map_err(Error::DeviceTree)?; import_parameter(&mut parameter_areas, info, &dt)?; diff --git a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs index 22c0600443..3fbcc57cc1 100644 --- a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs +++ b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +use crate::worker::memory_layout::ChipsetMmioRanges; use guestmem::GuestMemory; use loader::importer::Aarch64Register; use loader::importer::X86Register; @@ -10,6 +11,7 @@ use loader::linux::InitrdAddressType; use loader::linux::InitrdConfig; use loader::linux::RegisterConfig; use loader::linux::ZeroPageConfig; +use memory_range::MemoryRange; use std::ffi::CString; use std::io::Seek; use thiserror::Error; @@ -148,6 +150,8 @@ fn build_dt( processor_topology: &ProcessorTopology, pcie_host_bridges: &[PcieHostBridge], smmu_configs: &[vmm_core::acpi_builder::AcpiSmmuConfig], + chipset_low_mmio: MemoryRange, + chipset_high_mmio: MemoryRange, initrd_start: u64, initrd_end: u64, ) -> Result, fdt::builder::Error> { @@ -540,13 +544,7 @@ fn build_dt( } } - // Build VMBus MMIO ranges from the memory layout's chipset MMIO gaps. - assert!( - cfg.mem_layout.mmio().len() >= 2, - "need at least two MMIO regions for VMBus DT node" - ); - let low_mmio_gap = cfg.mem_layout.mmio()[0]; - let high_mmio_gap = cfg.mem_layout.mmio()[1]; + // Build VMBus MMIO ranges from the chipset MMIO ranges. soc = soc .start_node("vmbus")? .add_u32(p_address_cells, 2)? @@ -555,10 +553,10 @@ fn build_dt( .add_u64_array( p_ranges, &[ - low_mmio_gap.start(), - low_mmio_gap.len(), - high_mmio_gap.start(), - high_mmio_gap.len(), + chipset_low_mmio.start(), + chipset_low_mmio.len(), + chipset_high_mmio.start(), + chipset_high_mmio.len(), ], )? .add_str(p_compatible, "microsoft,vmbus")? @@ -835,6 +833,7 @@ pub fn load_linux_arm64( processor_topology: &ProcessorTopology, pcie_host_bridges: &[PcieHostBridge], smmu_configs: &[vmm_core::acpi_builder::AcpiSmmuConfig], + chipset_mmio: &ChipsetMmioRanges, build_acpi: Option vmm_core::acpi_builder::BuiltAcpiTables>, ) -> Result, Error> { let mut loader = Loader::new(gm.clone(), cfg.mem_layout, hvdef::Vtl::Vtl0); @@ -896,6 +895,8 @@ pub fn load_linux_arm64( processor_topology, pcie_host_bridges, smmu_configs, + chipset_mmio.low, + chipset_mmio.high, initrd_start, initrd_end, ) diff --git a/openvmm/openvmm_core/src/worker/vm_loaders/uefi.rs b/openvmm/openvmm_core/src/worker/vm_loaders/uefi.rs index 1865c64ad4..0880e84329 100644 --- a/openvmm/openvmm_core/src/worker/vm_loaders/uefi.rs +++ b/openvmm/openvmm_core/src/worker/vm_loaders/uefi.rs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +use crate::worker::memory_layout::ChipsetMmioRanges; use guestmem::GuestMemory; use guid::Guid; use hvdef::HV_PAGE_SIZE; @@ -23,8 +24,6 @@ pub enum Error { Firmware(#[source] std::io::Error), #[error("uefi loader error")] Loader(#[source] loader::uefi::Error), - #[error("UEFI requires at least two MMIO ranges")] - UnsupportedMmio, #[error("failed to build PCIe ACPI tables")] PcieAcpi(#[source] vmm_core::acpi_builder::PcieAcpiBuildError), #[cfg(guest_arch = "aarch64")] @@ -56,15 +55,12 @@ pub fn load_uefi( mem_layout: &MemoryLayout, pcie_host_bridges: &[PcieHostBridge], load_settings: UefiLoadSettings, + chipset_mmio: &ChipsetMmioRanges, madt: &[u8], srat: &[u8], mcfg: Option<&[u8]>, pptt: Option<&[u8]>, ) -> Result, Error> { - if mem_layout.mmio().len() < 2 { - return Err(Error::UnsupportedMmio); - } - let mut loaded_image; let image = { loaded_image = Vec::new(); @@ -89,9 +85,6 @@ pub fn load_uefi( }) .collect(); - let low_mmio = mem_layout.mmio()[0]; - let high_mmio = mem_layout.mmio()[1]; - let flags = config::Flags::new() .with_hibernate_enabled(true) .with_serial_controllers_enabled(load_settings.serial) @@ -136,12 +129,12 @@ pub fn load_uefi( .add(&config::Entropy(entropy)) .add(&config::MmioRanges([ config::Mmio { - mmio_page_number_start: low_mmio.start() / HV_PAGE_SIZE, - mmio_size_in_pages: (low_mmio.end() - low_mmio.start()) / HV_PAGE_SIZE, + mmio_page_number_start: chipset_mmio.low.start() / HV_PAGE_SIZE, + mmio_size_in_pages: chipset_mmio.low.len() / HV_PAGE_SIZE, }, config::Mmio { - mmio_page_number_start: high_mmio.start() / HV_PAGE_SIZE, - mmio_size_in_pages: (high_mmio.end() - high_mmio.start()) / HV_PAGE_SIZE, + mmio_page_number_start: chipset_mmio.high.start() / HV_PAGE_SIZE, + mmio_size_in_pages: chipset_mmio.high.len() / HV_PAGE_SIZE, }, ])) .add(&config::ProcessorInformation { diff --git a/vm/acpi/src/dsdt.rs b/vm/acpi/src/dsdt.rs index e3584dda3b..88f215e534 100644 --- a/vm/acpi/src/dsdt.rs +++ b/vm/acpi/src/dsdt.rs @@ -233,8 +233,8 @@ impl Dsdt { vmod.add_object(&NamedString::new(b"_HID", b"ACPI0004")); vmod.add_object(&NamedInteger::new(b"_UID", 0)); let mut vmod_crs = CurrentResourceSettings::new(); - vmod_crs.add_resource(&QwordMemory::new(low.start(), low.end() - low.start())); - vmod_crs.add_resource(&QwordMemory::new(high.start(), high.end() - high.start())); + vmod_crs.add_resource(&QwordMemory::new(low.start(), low.len())); + vmod_crs.add_resource(&QwordMemory::new(high.start(), high.len())); vmod.add_object(&vmod_crs); self.add_object(&vmod); } @@ -292,8 +292,8 @@ impl Dsdt { let mut crs = CurrentResourceSettings::new(); crs.add_resource(&BusNumber::new(0, 1)); crs.add_resource(&IoPort::new(0xcf8, 0xcf8, 8)); - crs.add_resource(&QwordMemory::new(low.start(), low.end() - low.start())); - crs.add_resource(&QwordMemory::new(high.start(), high.end() - high.start())); + crs.add_resource(&QwordMemory::new(low.start(), low.len())); + crs.add_resource(&QwordMemory::new(high.start(), high.len())); pci0.add_object(&crs); self.add_object(&pci0); } diff --git a/vm/devices/firmware/firmware_pcat/Cargo.toml b/vm/devices/firmware/firmware_pcat/Cargo.toml index 57f48d0390..91aca142ed 100644 --- a/vm/devices/firmware/firmware_pcat/Cargo.toml +++ b/vm/devices/firmware/firmware_pcat/Cargo.toml @@ -11,6 +11,7 @@ generation_id.workspace = true chipset_device.workspace = true guestmem.workspace = true +memory_range.workspace = true vmcore.workspace = true vm_topology = { workspace = true, features = ["inspect"] } @@ -26,8 +27,5 @@ tracelimit.workspace = true tracing.workspace = true zerocopy.workspace = true -[dev-dependencies] -memory_range.workspace = true - [lints] workspace = true diff --git a/vm/devices/firmware/firmware_pcat/src/lib.rs b/vm/devices/firmware/firmware_pcat/src/lib.rs index 06e37fa0b9..6c23bcac46 100644 --- a/vm/devices/firmware/firmware_pcat/src/lib.rs +++ b/vm/devices/firmware/firmware_pcat/src/lib.rs @@ -51,6 +51,7 @@ use zerocopy::IntoBytes; pub mod config { use guid::Guid; use inspect::Inspect; + use memory_range::MemoryRange; use vm_topology::memory::MemoryLayout; use vm_topology::processor::ProcessorTopology; use vm_topology::processor::x86::X86Topology; @@ -118,6 +119,10 @@ pub mod config { pub processor_topology: ProcessorTopology, /// The VM's memory layout pub mem_layout: MemoryLayout, + /// Chipset low MMIO range (below 4 GB). + pub chipset_low_mmio: MemoryRange, + /// Chipset high MMIO range (above RAM). + pub chipset_high_mmio: MemoryRange, /// The SRAT ACPI table reflected into the guest pub srat: Vec, /// Initial [Generation Id](generation_id) value @@ -243,8 +248,8 @@ const POST_IO_PORT: u16 = 0x80; #[derive(Debug, Error)] #[expect(missing_docs)] // self-explanatory variants pub enum PcatBiosDeviceInitError { - #[error("expected exactly 2 mmio holes, found {0}")] - IncorrectMmioHoles(usize), + #[error("PCAT requires non-empty chipset low and high MMIO ranges")] + IncorrectMmioHoles, #[error("invalid ROM size {0:x} bytes, expected 256KB")] InvalidRomSize(u64), #[error("error mapping ROM")] @@ -269,10 +274,8 @@ impl PcatBiosDevice { let initial_generation_id = config.initial_generation_id; - if config.mem_layout.mmio().len() != 2 { - return Err(PcatBiosDeviceInitError::IncorrectMmioHoles( - config.mem_layout.mmio().len(), - )); + if config.chipset_low_mmio.is_empty() || config.chipset_high_mmio.is_empty() { + return Err(PcatBiosDeviceInitError::IncorrectMmioHoles); } let mut rom_mems = Vec::new(); @@ -402,7 +405,7 @@ impl PcatBiosDevice { } } PcatAddress::PCI_IO_GAP_START => { - self.config.mem_layout.mmio()[0].start().try_into().unwrap() + self.config.chipset_low_mmio.start().try_into().unwrap() } PcatAddress::PROCESSOR_STA_ENABLE => { // Read by the ACPI _STA (status) method in the Processor @@ -432,7 +435,7 @@ impl PcatBiosDevice { // Consumers: // - vmbios/source/bsp/em/smbios/Smbport.asm, // - core/src/MEM.ASM. - self.config.mem_layout.mmio()[1].start().to_mb() + self.config.chipset_high_mmio.start().to_mb() } PcatAddress::HIGH_MMIO_GAP_LENGTH_IN_MB => { // Consumers: @@ -444,7 +447,7 @@ impl PcatBiosDevice { // this code was written in Hyper-V, the `end - start` // calculation used an _inclusive_ `start..=end` range from the // MMIO gaps API, which wasn't properly compensated for here. - self.config.mem_layout.mmio()[1].len().to_mb() - 1 + self.config.chipset_high_mmio.len().to_mb() - 1 } PcatAddress::E820_ENTRY => handle_int15_e820_query( &self.config.mem_layout, diff --git a/vm/loader/src/common.rs b/vm/loader/src/common.rs index 8fa00ecfac..f301e41abd 100644 --- a/vm/loader/src/common.rs +++ b/vm/loader/src/common.rs @@ -10,6 +10,7 @@ use crate::importer::SegmentRegister; use crate::importer::TableRegister; use crate::importer::X86Register; use hvdef::HV_PAGE_SIZE; +use memory_range::MemoryRange; use std::io::Read; use std::io::Seek; use thiserror::Error; @@ -99,24 +100,17 @@ pub fn import_default_gdt( Ok(()) } -/// Returned when the MMIO layout is not supported. -#[derive(Debug, Error)] -#[error("exactly two MMIO gaps are required")] -pub struct UnsupportedMmio; - /// Computes the x86 variable MTRRs that describe the given memory layout. This /// is intended to be used to setup MTRRs for booting a guest with two mmio /// gaps, such as booting Linux, UEFI, or PCAT. -/// -/// N.B. Currently this panics if there are not exactly two MMIO ranges. pub fn compute_variable_mtrrs( memory: &MemoryLayout, physical_address_width: u8, -) -> Result, UnsupportedMmio> { + chipset_low_mmio: MemoryRange, + chipset_high_mmio: MemoryRange, +) -> Vec { const WRITEBACK: u64 = 0x6; - let &[mmio_gap_low, mmio_gap_high] = memory.mmio().try_into().map_err(|_| UnsupportedMmio)?; - // Clamp the width to something reasonable. let gpa_space_size = physical_address_width.clamp(36, 52); @@ -141,17 +135,19 @@ pub fn compute_variable_mtrrs( result.push(X86Register::MtrrPhysBase1(pcat_mtrr_size | WRITEBACK)); result.push(X86Register::MtrrPhysMask1(mtrr_mask( gpa_space_size, - mmio_gap_low.start() - 1, + chipset_low_mmio.start() - 1, ))); } // If there is more than ~3.8GB of memory, use MTRR 204 and MTRR Mask 205 to cover // the amount of memory above 4GB. - if memory.end_of_ram() > mmio_gap_low.end() { - result.push(X86Register::MtrrPhysBase2(mmio_gap_low.end() | WRITEBACK)); + if memory.end_of_ram() > chipset_low_mmio.end() { + result.push(X86Register::MtrrPhysBase2( + chipset_low_mmio.end() | WRITEBACK, + )); result.push(X86Register::MtrrPhysMask2(mtrr_mask( gpa_space_size, - mmio_gap_high.start() - 1, + chipset_high_mmio.start() - 1, ))); } @@ -159,8 +155,10 @@ pub fn compute_variable_mtrrs( // MTRR 208 and MTRR Mask 209 depending on maximum address width. Both MTRR pairs are // used with the magic 8TB boundary to work around a bug in older Linux kernels // (e.g. RHEL 6.x, etc.) - if memory.end_of_ram() > mmio_gap_high.end() { - result.push(X86Register::MtrrPhysBase3(mmio_gap_high.end() | WRITEBACK)); + if memory.end_of_ram() > chipset_high_mmio.end() { + result.push(X86Register::MtrrPhysBase3( + chipset_high_mmio.end() | WRITEBACK, + )); result.push(X86Register::MtrrPhysMask3(mtrr_mask( gpa_space_size, (1 << std::cmp::min(gpa_space_size, 43)) - 1, @@ -174,7 +172,7 @@ pub fn compute_variable_mtrrs( } } - Ok(result) + result } fn mtrr_mask(gpa_space_size: u8, maximum_address: u64) -> u64 { diff --git a/vmm_core/src/acpi_builder.rs b/vmm_core/src/acpi_builder.rs index cc1c2642cb..5d4dbad5a8 100644 --- a/vmm_core/src/acpi_builder.rs +++ b/vmm_core/src/acpi_builder.rs @@ -754,7 +754,7 @@ impl AcpiTablesBuilder<'_, T> { /// Returns tables that should be loaded at the supplied gpa. pub fn build_acpi_tables(&self, gpa: u64, add_devices_to_dsdt: F) -> BuiltAcpiTables where - F: FnOnce(&MemoryLayout, &mut dsdt::Dsdt), + F: FnOnce(&mut dsdt::Dsdt), { let mut dsdt_data = dsdt::Dsdt::new(); // Name(\_S0, Package(2){0, 0}) @@ -768,7 +768,7 @@ impl AcpiTablesBuilder<'_, T> { &dsdt::Package(vec![0, 0]), )); // Add any chipset devices. - add_devices_to_dsdt(self.mem_layout, &mut dsdt_data); + add_devices_to_dsdt(&mut dsdt_data); // Add processor devices: // Device(P###) { Name(_HID, "ACPI0007") Name(_UID, #) Method(_STA, 0) { Return(0xF) } } for proc_index in 1..self.processor_topology.vp_count() + 1 { @@ -1306,7 +1306,7 @@ mod test { let builder = new_builder(&mem, &topology, &pcie_host_bridges); assert!(builder.build_iort().is_none()); - let tables = builder.build_acpi_tables(0x100000, |_, _| {}); + let tables = builder.build_acpi_tables(0x100000, |_| {}); assert!(!contains_signature(&tables.tables, b"IORT")); } @@ -1335,7 +1335,7 @@ mod test { }]; let builder = new_aarch64_builder(&mem, &topology, &pcie_host_bridges); - let tables = builder.build_acpi_tables(0x100000, |_, _| {}); + let tables = builder.build_acpi_tables(0x100000, |_| {}); assert!(contains_signature(&tables.tables, b"MCFG")); assert!(contains_signature(&tables.tables, b"IORT")); } @@ -1393,7 +1393,7 @@ mod test { }]; let builder = new_builder(&mem, &topology, &pcie_host_bridges); - let tables = builder.build_acpi_tables(0x100000, |_, _| {}); + let tables = builder.build_acpi_tables(0x100000, |_| {}); assert!(contains_signature(&tables.tables, b"CEDT")); } From 928797d61b4e9453cfbb72aecb505907d9de31b3 Mon Sep 17 00:00:00 2001 From: John Starks Date: Fri, 22 May 2026 00:02:56 +0000 Subject: [PATCH 2/4] tweaks --- openvmm/openvmm_core/src/worker/dispatch.rs | 20 ++++++++++----- vm/loader/src/common.rs | 27 +++++++++++++++++---- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/openvmm/openvmm_core/src/worker/dispatch.rs b/openvmm/openvmm_core/src/worker/dispatch.rs index 4a13b3e9c3..200c1e2904 100644 --- a/openvmm/openvmm_core/src/worker/dispatch.rs +++ b/openvmm/openvmm_core/src/worker/dispatch.rs @@ -2816,7 +2816,12 @@ impl LoadedVmInner { let build_acpi = if boot_mode == LinuxDirectBootMode::Acpi { Some(|rsdp_gpa: u64| { acpi_builder.build_acpi_tables(rsdp_gpa, |dsdt| { - add_devices_to_dsdt_arm64(dsdt, enable_serial, &self.chipset_mmio) + add_devices_to_dsdt_arm64( + dsdt, + enable_serial, + &self.chipset_mmio, + self.hypervisor_cfg.with_hv, + ) }) }) } else { @@ -3596,6 +3601,7 @@ fn add_devices_to_dsdt_arm64( dsdt: &mut dsdt::Dsdt, enable_serial: bool, chipset_mmio: &ChipsetMmioRanges, + with_hv: bool, ) { // VMBus GIC INTID (PPI 2 = INTID 16 + 2 = 18), matching the DT path. const VMBUS_INTID: u32 = openvmm_defs::config::DEFAULT_VMBUS_PPI; @@ -3607,12 +3613,14 @@ fn add_devices_to_dsdt_arm64( const PL011_SERIAL0_GSIV: u32 = 33; const PL011_SERIAL1_GSIV: u32 = 34; - dsdt.add_mmio_module(chipset_mmio.low, chipset_mmio.high); + if with_hv { + dsdt.add_mmio_module(chipset_mmio.low, chipset_mmio.high); - // VMBus on ARM64 ACPI needs a per-CPU interrupt (PPI) in _CRS. - // Always place under VMOD, not PCI0 — ARM64 doesn't use the x86 - // PCI0 DSDT node. - dsdt.add_vmbus(false, Some(VMBUS_INTID)); + // VMBus on ARM64 ACPI needs a per-CPU interrupt (PPI) in _CRS. + // Always place under VMOD, not PCI0 — ARM64 doesn't use the x86 + // PCI0 DSDT node. + dsdt.add_vmbus(false, Some(VMBUS_INTID)); + } if enable_serial { dsdt.add_sbsa_uart( diff --git a/vm/loader/src/common.rs b/vm/loader/src/common.rs index f301e41abd..18ae9a86ae 100644 --- a/vm/loader/src/common.rs +++ b/vm/loader/src/common.rs @@ -145,17 +145,34 @@ pub fn compute_variable_mtrrs( result.push(X86Register::MtrrPhysBase2( chipset_low_mmio.end() | WRITEBACK, )); - result.push(X86Register::MtrrPhysMask2(mtrr_mask( - gpa_space_size, - chipset_high_mmio.start() - 1, - ))); + if chipset_high_mmio.is_empty() { + // No high MMIO gap — RAM above the low gap is contiguous. + // Cover to the 8TB boundary (or GPA space limit) using the + // same split as the >64GB path below. + result.push(X86Register::MtrrPhysMask2(mtrr_mask( + gpa_space_size, + (1 << std::cmp::min(gpa_space_size, 43)) - 1, + ))); + if gpa_space_size > 43 { + result.push(X86Register::MtrrPhysBase3((1 << 43) | WRITEBACK)); + result.push(X86Register::MtrrPhysMask3(mtrr_mask( + gpa_space_size, + (1 << gpa_space_size) - 1, + ))); + } + } else { + result.push(X86Register::MtrrPhysMask2(mtrr_mask( + gpa_space_size, + chipset_high_mmio.start() - 1, + ))); + } } // If there is more memory than 64GB then use MTRR 206 and MTRR Mask 207 and possibly // MTRR 208 and MTRR Mask 209 depending on maximum address width. Both MTRR pairs are // used with the magic 8TB boundary to work around a bug in older Linux kernels // (e.g. RHEL 6.x, etc.) - if memory.end_of_ram() > chipset_high_mmio.end() { + if !chipset_high_mmio.is_empty() && memory.end_of_ram() > chipset_high_mmio.end() { result.push(X86Register::MtrrPhysBase3( chipset_high_mmio.end() | WRITEBACK, )); From 505c089d569f4e62a4c8b3a471c9bff43b600fd6 Mon Sep 17 00:00:00 2001 From: John Starks Date: Sat, 23 May 2026 13:29:08 -0700 Subject: [PATCH 3/4] feedback --- .../openvmm_core/src/worker/memory_layout.rs | 38 +++++ vm/acpi/src/dsdt.rs | 12 +- vm/loader/src/common.rs | 138 ++++++++++++++++++ 3 files changed, 185 insertions(+), 3 deletions(-) diff --git a/openvmm/openvmm_core/src/worker/memory_layout.rs b/openvmm/openvmm_core/src/worker/memory_layout.rs index c0b68d40f5..300540320e 100644 --- a/openvmm/openvmm_core/src/worker/memory_layout.rs +++ b/openvmm/openvmm_core/src/worker/memory_layout.rs @@ -980,4 +980,42 @@ mod tests { assert!(err.to_string().contains("ECAM"), "unexpected error: {err}"); } + + #[test] + fn vtl2_framebuffer_allocation() { + let framebuffer_size = 8 * MB; + let mut config = input(2 * GB, None, None); + config.vtl2_framebuffer_size = framebuffer_size; + let result = resolve_memory_layout(config).unwrap(); + + let gpa_base = result + .vtl2_framebuffer_gpa_base + .expect("framebuffer GPA should be allocated"); + // Must be page-aligned. + assert_eq!(gpa_base % PAGE_SIZE, 0, "framebuffer GPA not page-aligned"); + let fb_range = MemoryRange::new(gpa_base..gpa_base + framebuffer_size); + // Must not overlap RAM. + for ram in result.memory_layout.ram() { + assert!( + !fb_range.overlaps(&ram.range), + "framebuffer {fb_range} overlaps RAM {}", + ram.range, + ); + } + // Must not overlap chipset MMIO. + for mmio in result.memory_layout.mmio() { + assert!( + !fb_range.overlaps(mmio), + "framebuffer {fb_range} overlaps MMIO {mmio}", + ); + } + } + + #[test] + fn vtl2_framebuffer_zero_size_returns_none() { + let mut config = input(2 * GB, None, None); + config.vtl2_framebuffer_size = 0; + let result = resolve_memory_layout(config).unwrap(); + assert!(result.vtl2_framebuffer_gpa_base.is_none()); + } } diff --git a/vm/acpi/src/dsdt.rs b/vm/acpi/src/dsdt.rs index 88f215e534..5081302cb8 100644 --- a/vm/acpi/src/dsdt.rs +++ b/vm/acpi/src/dsdt.rs @@ -233,8 +233,12 @@ impl Dsdt { vmod.add_object(&NamedString::new(b"_HID", b"ACPI0004")); vmod.add_object(&NamedInteger::new(b"_UID", 0)); let mut vmod_crs = CurrentResourceSettings::new(); - vmod_crs.add_resource(&QwordMemory::new(low.start(), low.len())); - vmod_crs.add_resource(&QwordMemory::new(high.start(), high.len())); + if !low.is_empty() { + vmod_crs.add_resource(&QwordMemory::new(low.start(), low.len())); + } + if !high.is_empty() { + vmod_crs.add_resource(&QwordMemory::new(high.start(), high.len())); + } vmod.add_object(&vmod_crs); self.add_object(&vmod); } @@ -293,7 +297,9 @@ impl Dsdt { crs.add_resource(&BusNumber::new(0, 1)); crs.add_resource(&IoPort::new(0xcf8, 0xcf8, 8)); crs.add_resource(&QwordMemory::new(low.start(), low.len())); - crs.add_resource(&QwordMemory::new(high.start(), high.len())); + if !high.is_empty() { + crs.add_resource(&QwordMemory::new(high.start(), high.len())); + } pci0.add_object(&crs); self.add_object(&pci0); } diff --git a/vm/loader/src/common.rs b/vm/loader/src/common.rs index 18ae9a86ae..47486aaea0 100644 --- a/vm/loader/src/common.rs +++ b/vm/loader/src/common.rs @@ -408,3 +408,141 @@ impl ChunkBuf { Ok(hasher.finalize()) } } + +#[cfg(test)] +mod tests { + use super::*; + + const GB: u64 = 1024 * 1024 * 1024; + const MB: u64 = 1024 * 1024; + + fn standard_low_mmio() -> MemoryRange { + // 128 MB gap below 4 GiB, matching the typical x86_64 config. + MemoryRange::new(4 * GB - 128 * MB..4 * GB) + } + + fn standard_high_mmio() -> MemoryRange { + // 512 MB immediately above 4 GiB. + MemoryRange::new(4 * GB..4 * GB + 512 * MB) + } + + fn make_layout(ram_size: u64) -> MemoryLayout { + MemoryLayout::new( + ram_size, + &[standard_low_mmio(), standard_high_mmio()], + &[], + &[], + None, + ) + .unwrap() + } + + /// Count MTRR base/mask pairs in the register list. + fn pair_count(regs: &[X86Register]) -> usize { + assert_eq!(regs.len() % 2, 0, "MTRR registers come in pairs"); + regs.len() / 2 + } + + #[test] + fn mtrr_128mb_exactly() { + // 128 MB = pcat_mtrr_size → only pair 0 (base covers 128 MB) + let layout = make_layout(128 * MB); + let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio()); + assert_eq!(pair_count(®s), 1); + assert_eq!(regs[0], X86Register::MtrrPhysBase0(0x6)); + } + + #[test] + fn mtrr_256mb() { + // 256 MB: pair 0 (128 MB) + pair 1 (128 MB..low_gap_start) + let layout = make_layout(256 * MB); + let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio()); + assert_eq!(pair_count(®s), 2); + assert_eq!(regs[0], X86Register::MtrrPhysBase0(0x6)); + // Pair 1 base = 128 MB | WRITEBACK + assert_eq!(regs[2], X86Register::MtrrPhysBase1((128 * MB) | 0x6)); + } + + #[test] + fn mtrr_2gb() { + // 2 GB: same structure as 256 MB (RAM is below 4 GiB) + let layout = make_layout(2 * GB); + let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio()); + assert_eq!(pair_count(®s), 2); + assert_eq!(regs[2], X86Register::MtrrPhysBase1((128 * MB) | 0x6)); + } + + #[test] + fn mtrr_8gb() { + // 8 GB: RAM above 4 GiB and above high MMIO end → pairs 0–4 + let layout = make_layout(8 * GB); + let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio()); + // Pair 0: 0..128 MB + // Pair 1: 128 MB..low gap start + // Pair 2: low gap end (4 GiB)..high gap start (4 GiB) — effectively empty but still emitted + // Pair 3: high gap end..8 TB boundary + // Pair 4: 8 TB.. (gpa_space_size > 43) + assert_eq!(pair_count(®s), 5); + + // Verify key base addresses + assert_eq!(regs[0], X86Register::MtrrPhysBase0(0x6)); + assert_eq!(regs[2], X86Register::MtrrPhysBase1((128 * MB) | 0x6)); + assert_eq!( + regs[4], + X86Register::MtrrPhysBase2(standard_low_mmio().end() | 0x6) + ); + assert_eq!( + regs[6], + X86Register::MtrrPhysBase3(standard_high_mmio().end() | 0x6) + ); + assert_eq!(regs[8], X86Register::MtrrPhysBase4((1u64 << 43) | 0x6)); + } + + #[test] + fn mtrr_8gb_no_high_mmio() { + // New behavior: no high MMIO gap. RAM above 4 GiB is contiguous. + // Uses the is_empty() branch. + let low = standard_low_mmio(); + let layout = MemoryLayout::new(8 * GB, &[low], &[], &[], None).unwrap(); + let regs = compute_variable_mtrrs(&layout, 46, low, MemoryRange::EMPTY); + // Pair 0: 0..128 MB + // Pair 1: 128 MB..low gap start + // Pair 2: low gap end..8 TB boundary (is_empty branch) + // Pair 3: 8 TB..GPA limit (gpa_space_size > 43) + assert_eq!(pair_count(®s), 4); + assert_eq!(regs[4], X86Register::MtrrPhysBase2(low.end() | 0x6)); + assert_eq!(regs[6], X86Register::MtrrPhysBase3((1u64 << 43) | 0x6)); + } + + #[test] + fn mtrr_narrow_address_width() { + // 40-bit address width (< 43): the 8 TB split is not needed. + let layout = make_layout(8 * GB); + let regs = compute_variable_mtrrs(&layout, 40, standard_low_mmio(), standard_high_mmio()); + // No pair 4 because gpa_space_size <= 43 + assert_eq!(pair_count(®s), 4); + assert_eq!( + regs[6], + X86Register::MtrrPhysBase3(standard_high_mmio().end() | 0x6) + ); + } + + #[test] + fn mtrr_masks_have_enabled_bit() { + // Every mask register should have the ENABLED bit (bit 11) set. + let layout = make_layout(8 * GB); + let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio()); + for (i, reg) in regs.iter().enumerate() { + match reg { + X86Register::MtrrPhysMask0(v) + | X86Register::MtrrPhysMask1(v) + | X86Register::MtrrPhysMask2(v) + | X86Register::MtrrPhysMask3(v) + | X86Register::MtrrPhysMask4(v) => { + assert!(v & (1 << 11) != 0, "mask at index {i} missing ENABLED bit"); + } + _ => {} // base registers + } + } + } +} From 6da9cdb92c3bbd6f9cae22f6c67c8d55575d94b9 Mon Sep 17 00:00:00 2001 From: John Starks Date: Sat, 23 May 2026 19:20:16 -0700 Subject: [PATCH 4/4] cleanup --- vm/devices/firmware/firmware_pcat/src/lib.rs | 25 +++++++++++--- vm/vmcore/vm_topology/src/memory.rs | 35 -------------------- 2 files changed, 21 insertions(+), 39 deletions(-) diff --git a/vm/devices/firmware/firmware_pcat/src/lib.rs b/vm/devices/firmware/firmware_pcat/src/lib.rs index 6c23bcac46..d157079327 100644 --- a/vm/devices/firmware/firmware_pcat/src/lib.rs +++ b/vm/devices/firmware/firmware_pcat/src/lib.rs @@ -387,7 +387,14 @@ impl PcatBiosDevice { // Consumers: // - vmbios/source/bsp/em/smbios/Smbport.asm, // - core/src/MEM.ASM. - self.config.mem_layout.ram_above_4gb().to_mb() + self.config + .mem_layout + .ram() + .iter() + .filter(|r| r.range.end() >= 0x1_0000_0000) + .map(|r| r.range.len()) + .sum::() + .to_mb() } PcatAddress::SLEEP_STATES => { // The AMI BIOS wants to read a byte value of flags to determine @@ -427,8 +434,11 @@ impl PcatBiosDevice { // - core/src/MEM.ASM. self.config .mem_layout - .ram_above_high_mmio() - .expect("validated exactly 2 mmio ranges") + .ram() + .iter() + .filter(|r| r.range.start() >= self.config.chipset_high_mmio.end()) + .map(|r| r.range.len()) + .sum::() .to_mb() } PcatAddress::HIGH_MMIO_GAP_BASE_IN_MB => { @@ -456,7 +466,14 @@ impl PcatBiosDevice { ), PcatAddress::INITIAL_MEGABYTES_BELOW_GAP => { // Consumers: vmbios/source/bsp/em/smbios/smbios/Smbport.asm - self.config.mem_layout.ram_below_4gb().to_mb() + self.config + .mem_layout + .ram() + .iter() + .filter(|r| r.range.end() < 0x1_0000_0000) + .map(|r| r.range.len()) + .sum::() + .to_mb() } _ => { tracelimit::warn_ratelimited!(?addr, "unknown bios read"); diff --git a/vm/vmcore/vm_topology/src/memory.rs b/vm/vmcore/vm_topology/src/memory.rs index bf0fe05ea6..04634d7ec8 100644 --- a/vm/vmcore/vm_topology/src/memory.rs +++ b/vm/vmcore/vm_topology/src/memory.rs @@ -381,41 +381,6 @@ impl MemoryLayout { self.ram.last().expect("mmio set").range.end() } - /// The bytes of RAM below 4GB. - pub fn ram_below_4gb(&self) -> u64 { - self.ram - .iter() - .filter(|r| r.range.end() < FOUR_GB) - .map(|r| r.range.len()) - .sum() - } - - /// The bytes of RAM at or above 4GB. - pub fn ram_above_4gb(&self) -> u64 { - self.ram - .iter() - .filter(|r| r.range.end() >= FOUR_GB) - .map(|r| r.range.len()) - .sum() - } - - /// The bytes of RAM above the high MMIO gap. - /// - /// Returns None if there aren't exactly 2 MMIO gaps. - pub fn ram_above_high_mmio(&self) -> Option { - if self.mmio.len() != 2 { - return None; - } - - Some( - self.ram - .iter() - .filter(|r| r.range.start() >= self.mmio[1].end()) - .map(|r| r.range.len()) - .sum(), - ) - } - /// The ending RAM address below 4GB. /// /// Returns None if there is no RAM mapped below 4GB.