diff --git a/openhcl/underhill_core/src/loader/mod.rs b/openhcl/underhill_core/src/loader/mod.rs index f737834ecc..03fe60f258 100644 --- a/openhcl/underhill_core/src/loader/mod.rs +++ b/openhcl/underhill_core/src/loader/mod.rs @@ -5,6 +5,7 @@ use self::vtl2_config::RuntimeParameters; use crate::loader::vtl0_config::LinuxInfo; +use crate::worker::ChipsetMmioRanges; use crate::worker::FirmwareType; use cvm_tracing::CVM_ALLOWED; use guest_emulation_transport::api::platform_settings::DevicePlatformSettings; @@ -78,9 +79,6 @@ pub enum Error { InvalidAcpiTableLength, #[error("invalid acpi table: unknown header signature {0:?}")] InvalidAcpiTableSignature([u8; 4]), - #[cfg(guest_arch = "x86_64")] - #[error("acpi tables require at least two mmio ranges")] - UnsupportedMmio, #[cfg(guest_arch = "aarch64")] #[error("expected GICv3 topology")] ExpectedGicV3, @@ -115,6 +113,7 @@ pub fn load( config: Config, caps: &virt::PartitionCapabilities, isolated: bool, + chipset_mmio: &ChipsetMmioRanges, ) -> Result { let context = match load_kind { LoadKind::None => { @@ -137,6 +136,7 @@ pub fn load( platform_config, caps, isolated, + chipset_mmio, )?; uefi_info.vp_context.clone() } @@ -188,6 +188,7 @@ pub fn load( processor_topology, platform_config, chipset_capabilities, + chipset_mmio, kernel_range: *kernel_range, kernel_entrypoint: *kernel_entrypoint, initrd: *initrd, @@ -236,6 +237,7 @@ struct LoadLinuxParams<'a> { processor_topology: &'a ProcessorTopology, platform_config: &'a DevicePlatformSettings, chipset_capabilities: VmChipsetCapabilities, + chipset_mmio: &'a ChipsetMmioRanges, /// The region of memory used by the kernel. kernel_range: MemoryRange, /// The entrypoint of the kernel. @@ -261,6 +263,7 @@ fn load_linux(params: LoadLinuxParams<'_>) -> Result { processor_topology, platform_config, chipset_capabilities, + chipset_mmio, kernel_range, kernel_entrypoint, initrd, @@ -287,11 +290,7 @@ fn load_linux(params: LoadLinuxParams<'_>) -> Result { }, }; - if mem_layout.mmio().len() < 2 { - return Err(Error::UnsupportedMmio); - } - - let acpi_tables = acpi_builder.build_acpi_tables(ACPI_BASE, |mem_layout, dsdt| { + let acpi_tables = acpi_builder.build_acpi_tables(ACPI_BASE, |dsdt| { dsdt.add_apic(); // Add serial ports if enabled. @@ -315,7 +314,7 @@ fn load_linux(params: LoadLinuxParams<'_>) -> Result { ); } - dsdt.add_mmio_module(mem_layout.mmio()[0], mem_layout.mmio()[1]); + dsdt.add_mmio_module(chipset_mmio.low, chipset_mmio.high); // TODO: change this once PCI is running in underhill dsdt.add_vmbus(false, None); dsdt.add_rtc(); @@ -425,6 +424,7 @@ pub fn write_uefi_config( platform_config: &DevicePlatformSettings, caps: &virt::PartitionCapabilities, isolated: bool, + chipset_mmio: &ChipsetMmioRanges, ) -> Result<(), Error> { use guest_emulation_transport::api::platform_settings::UefiConsoleMode; @@ -524,8 +524,7 @@ pub fn write_uefi_config( ) .add_raw( config::BlobStructureType::MmioRanges, - mem_layout - .mmio() + [chipset_mmio.low, chipset_mmio.high] .iter() .map(|range| config::Mmio { mmio_page_number_start: range.start() / HV_PAGE_SIZE, diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index e2f08e18bd..70fc03ad54 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -1158,13 +1158,28 @@ struct BuiltVtl0MemoryLayout { complete_memory_layout: MemoryLayout, } +/// Chipset MMIO ranges. +#[derive(Debug, Copy, Clone)] +pub(crate) struct ChipsetMmioRanges { + /// Chipset low MMIO range (below 4 GB) for VMOD/PCI0 _CRS. Always at + /// least the architectural reserved zone (LAPIC, IOAPIC, TPM, ...). + pub low: MemoryRange, + /// Chipset high MMIO range (above RAM) for VMOD/PCI0 _CRS. `EMPTY` when + /// no chipset high MMIO is configured. + pub high: MemoryRange, +} + /// Build the VTL0 memory map after carving out any memory requested for shared /// visibility memory to be used by VTL2. fn build_vtl0_memory_layout( vtl0_memory_map: Vec<(MemoryRangeWithNode, MemoryMapEntryType)>, - mmio: &[MemoryRange], + chipset_mmio: &ChipsetMmioRanges, mut shared_pool_size: u64, ) -> anyhow::Result { + let mmio: Vec = [chipset_mmio.low, chipset_mmio.high] + .into_iter() + .filter(|r| !r.is_empty()) + .collect(); // Allocate shared_pool memory starting from the last (top of memory) // continuing downward until the size is covered. // @@ -1220,13 +1235,13 @@ fn build_vtl0_memory_layout( .collect::>(); let vtl0_memory_layout = - MemoryLayout::new_from_ranges(&memory, mmio).context("invalid memory layout")?; + MemoryLayout::new_from_ranges(&memory, &mmio).context("invalid memory layout")?; let complete_memory = vtl0_memory_map .iter() .map(|(entry, _typ)| entry.clone()) .collect::>(); - let complete_memory_layout = MemoryLayout::new_from_ranges(&complete_memory, mmio) + let complete_memory_layout = MemoryLayout::new_from_ranges(&complete_memory, &mmio) .context("invalid complete memory layout")?; tracing::info!( @@ -1694,20 +1709,34 @@ async fn new_underhill_vm( anyhow::bail!("cannot run the VPCI relay without the VMBus relay"); } - let mut vtl0_mmio; - let (vtl0_mmio, vpci_relay_mmio) = if enable_vpci_relay { - // Carve out enough VTL0 MMIO space for 64 devices. + // Construct chipset MMIO ranges from the positional convention in the + // device tree: [0] = low (below 4 GiB), [1] = high (above RAM). + let mut chipset_mmio = ChipsetMmioRanges { + low: boot_info + .vtl0_mmio + .first() + .copied() + .unwrap_or(MemoryRange::EMPTY), + high: boot_info + .vtl0_mmio + .get(1) + .copied() + .unwrap_or(MemoryRange::EMPTY), + }; + + let vpci_relay_mmio = if enable_vpci_relay { + // Carve out enough VTL0 MMIO space from the high range for 64 devices. let required_len = 64 * vpci_relay::VPCI_RELAY_MMIO_PER_DEVICE; - vtl0_mmio = boot_info.vtl0_mmio.clone(); - if vtl0_mmio.last().is_none_or(|r| r.len() < required_len) { + if chipset_mmio.high.is_empty() || chipset_mmio.high.len() < required_len { anyhow::bail!("too little VTL0 MMIO space to take for the VPCI relay"); } - let r = vtl0_mmio.last().unwrap(); - let (rest, vpci) = r.split_at_offset(r.len() - required_len); - *vtl0_mmio.last_mut().unwrap() = rest; - (&vtl0_mmio, vpci) + let (rest, vpci) = chipset_mmio + .high + .split_at_offset(chipset_mmio.high.len() - required_len); + chipset_mmio.high = rest; + vpci } else { - (&boot_info.vtl0_mmio, MemoryRange::EMPTY) + MemoryRange::EMPTY }; let BuiltVtl0MemoryLayout { @@ -1715,7 +1744,7 @@ async fn new_underhill_vm( vtl0_memory_layout: mem_layout, shared_pool, complete_memory_layout, - } = build_vtl0_memory_layout(vtl0_memory_map, vtl0_mmio, shared_pool_size)?; + } = build_vtl0_memory_layout(vtl0_memory_map, &chipset_mmio, shared_pool_size)?; // Determine if x2apic is supported so that the topology matches // reality. @@ -2446,6 +2475,8 @@ async fn new_underhill_vm( let config = firmware_pcat::config::PcatBiosConfig { processor_topology: processor_topology.clone(), mem_layout: mem_layout.clone(), + chipset_low_mmio: chipset_mmio.low, + chipset_high_mmio: chipset_mmio.high, srat: acpi_builder.build_srat(), hibernation_enabled: dps.general.hibernation_enabled, initial_generation_id, @@ -3002,9 +3033,12 @@ async fn new_underhill_vm( // By default on aarch64 send all MMIO accesses to the host. None } else { - let mut untrusted_mmio_ranges: Vec<_> = mem_layout.mmio().to_vec(); + let chipset_mmio_ranges = [chipset_mmio.low, chipset_mmio.high] + .into_iter() + .filter(|r| !r.is_empty()); + let mut untrusted_mmio_ranges: Vec<_> = chipset_mmio_ranges.clone().collect(); if vtom > 0 { - untrusted_mmio_ranges.extend(mem_layout.mmio().iter().map(|range| { + untrusted_mmio_ranges.extend(chipset_mmio_ranges.map(|range| { MemoryRange::new((range.start() + vtom)..(range.end() + vtom)) })); } @@ -3592,6 +3626,7 @@ async fn new_underhill_vm( load_kind, &dps, isolation.is_isolated(), + &chipset_mmio, ) .instrument(tracing::info_span!("load_firmware", CVM_ALLOWED)) .await?; @@ -3871,6 +3906,7 @@ async fn load_firmware( load_kind: LoadKind, dps: &DevicePlatformSettings, isolated: bool, + chipset_mmio: &ChipsetMmioRanges, ) -> Result<(), anyhow::Error> { let cmdline_append = match cmdline_append { Some(cmdline) => CString::new(cmdline.as_bytes()).context("bad command line")?, @@ -3891,19 +3927,19 @@ async fn load_firmware( loader_config, caps, isolated, + chipset_mmio, ) .context("failed to load firmware")?; #[cfg(guest_arch = "x86_64")] let registers = { let crate::loader::VpContext::Vbs(mut registers) = vtl0_vp_context; - registers.extend( - loader::common::compute_variable_mtrrs( - mem_layout, - partition.caps().physical_address_width, - ) - .context("Failed to compute variable mtrrs")?, - ); + registers.extend(loader::common::compute_variable_mtrrs( + mem_layout, + partition.caps().physical_address_width, + chipset_mmio.low, + chipset_mmio.high, + )); registers }; #[cfg(guest_arch = "aarch64")] diff --git a/openvmm/openvmm_core/src/worker/dispatch.rs b/openvmm/openvmm_core/src/worker/dispatch.rs index 7acf844917..200c1e2904 100644 --- a/openvmm/openvmm_core/src/worker/dispatch.rs +++ b/openvmm/openvmm_core/src/worker/dispatch.rs @@ -9,6 +9,7 @@ use crate::emuplat; use crate::partition::BindHvliteVp; use crate::partition::HvlitePartition; use crate::vmgs_non_volatile_store::HvLiteVmgsNonVolatileStore; +use crate::worker::memory_layout::ChipsetMmioRanges; use crate::worker::memory_layout::MemoryLayoutInput; use crate::worker::memory_layout::ResolvedPcieRootComplexRanges; use crate::worker::memory_layout::resolve_memory_layout; @@ -415,9 +416,8 @@ pub(crate) struct InitializedVm { mem_layout: MemoryLayout, resolved_pcie_root_complex_ranges: Vec, virtio_mmio_region: MemoryRange, - chipset_low_mmio: MemoryRange, - chipset_high_mmio: MemoryRange, - vtl2_chipset_mmio: MemoryRange, + chipset_mmio: ChipsetMmioRanges, + vtl2_framebuffer_gpa_base: Option, #[cfg(guest_arch = "aarch64")] resolved_smmu_resources: Vec, processor_topology: ProcessorTopology, @@ -720,12 +720,8 @@ struct LoadedVmInner { virtio_mmio_region: MemoryRange, #[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))] virtio_mmio_irq: u32, - /// Chipset low MMIO range for VMOD/PCI0 _CRS. - chipset_low_mmio: MemoryRange, - /// Chipset high MMIO range for VMOD/PCI0 _CRS. - chipset_high_mmio: MemoryRange, - /// VTL2-private chipset MMIO range for VTL2 VMBus. - vtl2_chipset_mmio: MemoryRange, + /// Resolved chipset MMIO ranges. + chipset_mmio: ChipsetMmioRanges, /// ((device, function), interrupt) #[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))] pci_legacy_interrupts: Vec<((u8, Option), u32)>, @@ -1003,6 +999,14 @@ impl InitializedVm { #[cfg(not(guest_arch = "aarch64"))] let smmu_count = 0; + let vtl2_framebuffer_size = if cfg.vtl2_gfx { + cfg.framebuffer + .as_ref() + .context("no framebuffer configured")? + .len() as u64 + } else { + 0 + }; let resolved_layout = resolve_memory_layout(MemoryLayoutInput { mem_size: cfg.memory.mem_size, numa_mem_sizes: cfg.memory.numa_mem_sizes.as_deref(), @@ -1011,15 +1015,14 @@ impl InitializedVm { virtio_mmio_count, smmu_count, vtl2_layout, + vtl2_framebuffer_size, physical_address_size, }) .context("invalid memory configuration")?; let mem_layout = resolved_layout.memory_layout; let resolved_pcie_root_complex_ranges = resolved_layout.pcie_root_complex_ranges; let virtio_mmio_region = resolved_layout.virtio_mmio_region; - let chipset_low_mmio = resolved_layout.chipset_low_mmio; - let chipset_high_mmio = resolved_layout.chipset_high_mmio; - let vtl2_chipset_mmio = resolved_layout.vtl2_chipset_mmio; + let chipset_mmio = resolved_layout.chipset_mmio; // Combine SMMU MMIO ranges with SPI layout. #[cfg(guest_arch = "aarch64")] @@ -1156,9 +1159,8 @@ impl InitializedVm { mem_layout, resolved_pcie_root_complex_ranges, virtio_mmio_region, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio, + vtl2_framebuffer_gpa_base: resolved_layout.vtl2_framebuffer_gpa_base, #[cfg(guest_arch = "aarch64")] resolved_smmu_resources, processor_topology, @@ -1189,9 +1191,8 @@ impl InitializedVm { mem_layout, resolved_pcie_root_complex_ranges, virtio_mmio_region, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio, + vtl2_framebuffer_gpa_base, #[cfg(guest_arch = "aarch64")] resolved_smmu_resources, processor_topology, @@ -1404,6 +1405,8 @@ impl InitializedVm { firmware_pcat::config::PcatBiosConfig { processor_topology: processor_topology.clone(), mem_layout: mem_layout.clone(), + chipset_low_mmio: chipset_mmio.low, + chipset_high_mmio: chipset_mmio.high, srat, hibernation_enabled: false, @@ -1452,25 +1455,9 @@ impl InitializedVm { _ => {} }; - let vtl2_framebuffer_gpa_base = if cfg.vtl2_gfx { - // calculate a safe place to put the framebuffer mapping in GPA space - // this places it after the end of ram at the first place it won't overlap with MMIO - let len = cfg - .framebuffer - .as_ref() - .context("no framebuffer configured")? - .len(); - let mut gpa = mem_layout.end_of_ram(); - for mmio in mem_layout.mmio() { - if gpa < mmio.end() && mmio.start() < gpa + len as u64 { - gpa = mmio.end(); - } - } + if let Some(gpa) = vtl2_framebuffer_gpa_base { tracing::debug!("Vtl2 framebuffer gpa base: {:#x}", gpa); - Some(gpa) - } else { - None - }; + } let state_units = StateUnits::new(); @@ -2683,9 +2670,7 @@ impl InitializedVm { load_mode: cfg.load_mode, virtio_mmio_region, virtio_mmio_irq, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio, pci_legacy_interrupts, igvm_file, next_igvm_file: None, @@ -2784,20 +2769,17 @@ impl LoadedVmInner { cmdline, mem_layout: &self.mem_layout, }; - if custom_dsdt.is_none() && self.mem_layout.mmio().len() < 2 { - anyhow::bail!("at least two mmio regions are required"); - } let regs = super::vm_loaders::linux::load_linux_x86(&kernel_config, &self.gm, |gpa| { let tables = if let Some(dsdt) = custom_dsdt { acpi_builder.build_acpi_tables_custom_dsdt(gpa, dsdt) } else { - acpi_builder.build_acpi_tables(gpa, |mem_layout, dsdt| { + acpi_builder.build_acpi_tables(gpa, |dsdt| { add_devices_to_dsdt_x64( - mem_layout, dsdt, &self.chipset_cfg, enable_serial, + &self.chipset_mmio, self.virtio_mmio_region, self.virtio_mmio_irq, &self.pci_legacy_interrupts, @@ -2831,11 +2813,15 @@ impl LoadedVmInner { mem_layout: &self.mem_layout, }; - let with_hv = self.hypervisor_cfg.with_hv; let build_acpi = if boot_mode == LinuxDirectBootMode::Acpi { Some(|rsdp_gpa: u64| { - acpi_builder.build_acpi_tables(rsdp_gpa, |mem_layout, dsdt| { - add_devices_to_dsdt_arm64(mem_layout, dsdt, enable_serial, with_hv) + acpi_builder.build_acpi_tables(rsdp_gpa, |dsdt| { + add_devices_to_dsdt_arm64( + dsdt, + enable_serial, + &self.chipset_mmio, + self.hypervisor_cfg.with_hv, + ) }) }) } else { @@ -2849,6 +2835,7 @@ impl LoadedVmInner { &self.processor_topology, &self.pcie_host_bridges, &self.smmu_configs, + &self.chipset_mmio, build_acpi, )?; @@ -2891,6 +2878,7 @@ impl LoadedVmInner { &self.mem_layout, &self.pcie_host_bridges, load_settings, + &self.chipset_mmio, &madt, &srat, mcfg.as_deref(), @@ -2935,9 +2923,7 @@ impl LoadedVmInner { with_vmbus_redirect: self.vmbus_redirect, com_serial, entropy: Some(&entropy), - chipset_low_mmio: self.chipset_low_mmio, - chipset_high_mmio: self.chipset_high_mmio, - vtl2_chipset_mmio: self.vtl2_chipset_mmio, + chipset_mmio: self.chipset_mmio, }; super::vm_loaders::igvm::load_igvm(params)? } @@ -2951,13 +2937,12 @@ impl LoadedVmInner { // VTL2 will setup MTRRs for VTL0 if needed. #[cfg(guest_arch = "x86_64")] if self.hypervisor_cfg.with_vtl2.is_none() { - regs.extend( - loader::common::compute_variable_mtrrs( - &self.mem_layout, - self.partition.caps().physical_address_width, - ) - .context("failed to compute variable mtrrs")?, - ); + regs.extend(loader::common::compute_variable_mtrrs( + &self.mem_layout, + self.partition.caps().physical_address_width, + self.chipset_mmio.low, + self.chipset_mmio.high, + )); } // Only set initial page visibility on isolated partitions. @@ -3554,10 +3539,10 @@ impl LoadedVm { #[cfg_attr(not(guest_arch = "x86_64"), expect(dead_code))] fn add_devices_to_dsdt_x64( - mem_layout: &MemoryLayout, dsdt: &mut dsdt::Dsdt, cfg: &BaseChipsetManifest, serial_uarts: bool, + chipset_mmio: &ChipsetMmioRanges, virtio_mmio_region: MemoryRange, virtio_mmio_irq: u32, pci_legacy_interrupts: &[((u8, Option), u32)], // ((device, function), interrupt) @@ -3579,36 +3564,29 @@ fn add_devices_to_dsdt_x64( } } - assert!( - mem_layout.mmio().len() >= 2, - "the DSDT describes two MMIO regions" - ); - let low_mmio_gap = mem_layout.mmio()[0]; - let high_mmio_gap = mem_layout.mmio()[1]; - // Virtio-mmio devices are allocated as a contiguous region by the memory // layout resolver. Each 4 KiB slot is a separate device. - { - for i in 0..virtio_mmio_region.page_count_4k() { - let slot_base = virtio_mmio_region.start() + i * HV_PAGE_SIZE; - let mut device = dsdt::Device::new(format!("\\_SB.VI{i:02}").as_bytes()); - device.add_object(&dsdt::NamedString::new(b"_HID", b"LNRO0005")); - device.add_object(&dsdt::NamedInteger::new(b"_UID", i)); - let mut crs = dsdt::CurrentResourceSettings::new(); - crs.add_resource(&dsdt::QwordMemory::new(slot_base, HV_PAGE_SIZE)); - let mut intr = dsdt::Interrupt::new(virtio_mmio_irq); - intr.is_edge_triggered = false; - crs.add_resource(&intr); - device.add_object(&crs); - dsdt.add_object(&device); - } + for i in 0..virtio_mmio_region.page_count_4k() { + let slot_base = virtio_mmio_region.start() + i * HV_PAGE_SIZE; + let mut device = dsdt::Device::new(format!("\\_SB.VI{i:02}").as_bytes()); + device.add_object(&dsdt::NamedString::new(b"_HID", b"LNRO0005")); + device.add_object(&dsdt::NamedInteger::new(b"_UID", i)); + let mut crs = dsdt::CurrentResourceSettings::new(); + crs.add_resource(&dsdt::QwordMemory::new(slot_base, HV_PAGE_SIZE)); + let mut intr = dsdt::Interrupt::new(virtio_mmio_irq); + intr.is_edge_triggered = false; + crs.add_resource(&intr); + device.add_object(&crs); + dsdt.add_object(&device); } + // The chipset MMIO module or PCI bus describes the chipset low/high + // MMIO regions to the guest. if cfg.with_generic_pci_bus || cfg.with_i440bx_host_pci_bridge { // TODO: actually plumb through legacy PCI interrupts - dsdt.add_pci(low_mmio_gap, high_mmio_gap, pci_legacy_interrupts); + dsdt.add_pci(chipset_mmio.low, chipset_mmio.high, pci_legacy_interrupts); } else { - dsdt.add_mmio_module(low_mmio_gap, high_mmio_gap); + dsdt.add_mmio_module(chipset_mmio.low, chipset_mmio.high); } dsdt.add_vmbus( @@ -3620,9 +3598,9 @@ fn add_devices_to_dsdt_x64( #[cfg(guest_arch = "aarch64")] fn add_devices_to_dsdt_arm64( - mem_layout: &MemoryLayout, dsdt: &mut dsdt::Dsdt, enable_serial: bool, + chipset_mmio: &ChipsetMmioRanges, with_hv: bool, ) { // VMBus GIC INTID (PPI 2 = INTID 16 + 2 = 18), matching the DT path. @@ -3636,16 +3614,8 @@ fn add_devices_to_dsdt_arm64( const PL011_SERIAL1_GSIV: u32 = 34; if with_hv { - // Internal invariant: the memory layout for ARM64 with HV always has - // at least two MMIO gaps (low + high). This is configured by OpenVMM - // itself, not by guest input. - assert!( - mem_layout.mmio().len() >= 2, - "need at least two MMIO regions" - ); - let low_mmio_gap = mem_layout.mmio()[0]; - let high_mmio_gap: MemoryRange = mem_layout.mmio()[1]; - dsdt.add_mmio_module(low_mmio_gap, high_mmio_gap); + dsdt.add_mmio_module(chipset_mmio.low, chipset_mmio.high); + // VMBus on ARM64 ACPI needs a per-CPU interrupt (PPI) in _CRS. // Always place under VMOD, not PCI0 — ARM64 doesn't use the x86 // PCI0 DSDT node. diff --git a/openvmm/openvmm_core/src/worker/memory_layout.rs b/openvmm/openvmm_core/src/worker/memory_layout.rs index 0ecd36c445..300540320e 100644 --- a/openvmm/openvmm_core/src/worker/memory_layout.rs +++ b/openvmm/openvmm_core/src/worker/memory_layout.rs @@ -49,6 +49,20 @@ const PCIE_ECAM_BYTES_PER_BUS: u64 = 32 * 8 * 4096; /// value, independent of any individual root complex's configuration. const PCIE_ECAM_MIN_ADDRESS: u64 = 256 * 1024 * 1024; +/// Resolved chipset MMIO ranges produced by the memory layout engine. +#[derive(Debug, Copy, Clone)] +pub(crate) struct ChipsetMmioRanges { + /// Chipset low MMIO range (below 4 GB) for VMOD/PCI0 _CRS. Always at + /// least the architectural reserved zone (LAPIC, IOAPIC, TPM, ...). + pub low: MemoryRange, + /// Chipset high MMIO range (above RAM) for VMOD/PCI0 _CRS. `EMPTY` when + /// no chipset high MMIO is configured. + pub high: MemoryRange, + /// VTL2-private chipset MMIO range, reported to VTL2 VMBus via the device + /// tree. `EMPTY` when VTL2 is not configured or has no chipset MMIO. + pub vtl2: MemoryRange, +} + #[derive(Debug)] pub(super) struct ResolvedMemoryLayout { pub memory_layout: MemoryLayout, @@ -57,15 +71,11 @@ pub(super) struct ResolvedMemoryLayout { /// 4 KiB, indexed from the start of the region. `EMPTY` when no /// virtio-mmio devices are configured. pub virtio_mmio_region: MemoryRange, - /// Chipset low MMIO range (below 4 GB) for VMOD/PCI0 _CRS. Always at - /// least the architectural reserved zone (LAPIC, IOAPIC, TPM, ...). - pub chipset_low_mmio: MemoryRange, - /// Chipset high MMIO range (above RAM) for VMOD/PCI0 _CRS. `EMPTY` when - /// no chipset high MMIO is configured. - pub chipset_high_mmio: MemoryRange, - /// VTL2-private chipset MMIO range, reported to VTL2 VMBus via the device - /// tree. `EMPTY` when VTL2 is not configured or has no chipset MMIO. - pub vtl2_chipset_mmio: MemoryRange, + /// Resolved chipset MMIO ranges. + pub chipset_mmio: ChipsetMmioRanges, + /// Resolved VTL2 framebuffer GPA base. `None` when VTL2 graphics is not + /// configured. + pub vtl2_framebuffer_gpa_base: Option, /// Resolved MMIO ranges for SMMUv3 instances, one per configured SMMU. /// Each range is `SMMU_SIZE` bytes. Empty when no SMMUs are configured. #[cfg_attr(not(guest_arch = "aarch64"), expect(dead_code))] @@ -102,6 +112,10 @@ pub(super) struct MemoryLayoutInput<'a> { /// Optional IGVM VTL2 private-memory request. This is allocated after all /// VTL0-visible RAM and MMIO and is carried separately from ordinary RAM. pub vtl2_layout: Option, + /// Size in bytes of the VTL2 framebuffer mapping. When non-zero, a + /// `PostMmio` allocation is created and the resolved GPA is returned in + /// `ResolvedMemoryLayout::vtl2_framebuffer_gpa_base`. + pub vtl2_framebuffer_size: u64, /// Host-supported physical address width used only after allocation. The /// allocator computes the smallest layout it can; host fit is validation. pub physical_address_size: u8, @@ -333,6 +347,19 @@ pub(super) fn resolve_memory_layout( ); } + // VTL2 framebuffer: a page-aligned PostMmio allocation so the GPA does + // not overlap RAM or any MMIO range. + let mut vtl2_framebuffer_range = MemoryRange::EMPTY; + if input.vtl2_framebuffer_size != 0 { + builder.request( + "vtl2-framebuffer", + &mut vtl2_framebuffer_range, + input.vtl2_framebuffer_size, + PAGE_SIZE, + Placement::PostMmio, + ); + } + let placed_ranges = builder .allocate() .context("allocating memory layout ranges")?; @@ -436,9 +463,16 @@ pub(super) fn resolve_memory_layout( memory_layout, pcie_root_complex_ranges, virtio_mmio_region, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio: ChipsetMmioRanges { + low: chipset_low_mmio, + high: chipset_high_mmio, + vtl2: vtl2_chipset_mmio, + }, + vtl2_framebuffer_gpa_base: if vtl2_framebuffer_range.is_empty() { + None + } else { + Some(vtl2_framebuffer_range.start()) + }, smmu_ranges, }) } @@ -542,6 +576,7 @@ mod tests { virtio_mmio_count: 0, smmu_count: 0, vtl2_layout, + vtl2_framebuffer_size: 0, physical_address_size: 46, } } @@ -619,8 +654,8 @@ mod tests { fn chipset_mmio_is_resolved() { let result = resolve_memory_layout(input(2 * GB, None, None)).unwrap(); - let low = result.chipset_low_mmio; - let high = result.chipset_high_mmio; + let low = result.chipset_mmio.low; + let high = result.chipset_mmio.high; assert_eq!(low.len(), DEFAULT_CHIPSET_LOW_MMIO_SIZE as u64); assert_eq!(high.len(), DEFAULT_CHIPSET_HIGH_MMIO_SIZE); // Chipset low MMIO is pinned to end at 4 GiB and must fully contain @@ -841,10 +876,10 @@ mod tests { let result = resolve_memory_layout(config).unwrap(); - let vtl2_mmio = result.vtl2_chipset_mmio; + let vtl2_mmio = result.chipset_mmio.vtl2; assert_eq!(vtl2_mmio.len(), DEFAULT_VTL2_CHIPSET_MMIO_SIZE); // VTL2 chipset MMIO should be after all VTL0-visible ranges. - let chipset_high = result.chipset_high_mmio; + let chipset_high = result.chipset_mmio.high; assert!( vtl2_mmio.start() >= chipset_high.end(), "VTL2 chipset MMIO should be after VTL0 high MMIO" @@ -874,10 +909,10 @@ mod tests { let result = resolve_memory_layout(config).unwrap(); - let low = result.chipset_low_mmio; + let low = result.chipset_mmio.low; assert_eq!(low.end(), 4 * GB); assert!(low.contains(&ARCH_RESERVED)); - assert!(result.chipset_high_mmio.is_empty()); + assert!(result.chipset_mmio.high.is_empty()); // The reported ranges must appear in MemoryLayout::mmio() preserving // the positional contract: [0] = low, [1] = high (EMPTY placeholder). assert_eq!(result.memory_layout.mmio(), &[low, MemoryRange::EMPTY]); @@ -890,16 +925,16 @@ mod tests { let mut config = input(2 * GB, None, None); config.layout.chipset_high_mmio_size = 0; let result = resolve_memory_layout(config).unwrap(); - assert!(!result.chipset_low_mmio.is_empty()); - assert!(result.chipset_high_mmio.is_empty()); + assert!(!result.chipset_mmio.low.is_empty()); + assert!(result.chipset_mmio.high.is_empty()); let mut config = input(2 * GB, None, None); config.layout.chipset_low_mmio_size = 0; let result = resolve_memory_layout(config).unwrap(); // Low is always at least the arch reserved zone. - assert!(!result.chipset_low_mmio.is_empty()); + assert!(!result.chipset_mmio.low.is_empty()); // High is still configured in this case. - assert!(!result.chipset_high_mmio.is_empty()); + assert!(!result.chipset_mmio.high.is_empty()); } #[test] @@ -945,4 +980,42 @@ mod tests { assert!(err.to_string().contains("ECAM"), "unexpected error: {err}"); } + + #[test] + fn vtl2_framebuffer_allocation() { + let framebuffer_size = 8 * MB; + let mut config = input(2 * GB, None, None); + config.vtl2_framebuffer_size = framebuffer_size; + let result = resolve_memory_layout(config).unwrap(); + + let gpa_base = result + .vtl2_framebuffer_gpa_base + .expect("framebuffer GPA should be allocated"); + // Must be page-aligned. + assert_eq!(gpa_base % PAGE_SIZE, 0, "framebuffer GPA not page-aligned"); + let fb_range = MemoryRange::new(gpa_base..gpa_base + framebuffer_size); + // Must not overlap RAM. + for ram in result.memory_layout.ram() { + assert!( + !fb_range.overlaps(&ram.range), + "framebuffer {fb_range} overlaps RAM {}", + ram.range, + ); + } + // Must not overlap chipset MMIO. + for mmio in result.memory_layout.mmio() { + assert!( + !fb_range.overlaps(mmio), + "framebuffer {fb_range} overlaps MMIO {mmio}", + ); + } + } + + #[test] + fn vtl2_framebuffer_zero_size_returns_none() { + let mut config = input(2 * GB, None, None); + config.vtl2_framebuffer_size = 0; + let result = resolve_memory_layout(config).unwrap(); + assert!(result.vtl2_framebuffer_gpa_base.is_none()); + } } diff --git a/openvmm/openvmm_core/src/worker/vm_loaders/igvm.rs b/openvmm/openvmm_core/src/worker/vm_loaders/igvm.rs index 9305c8dd0f..28b9e9be53 100644 --- a/openvmm/openvmm_core/src/worker/vm_loaders/igvm.rs +++ b/openvmm/openvmm_core/src/worker/vm_loaders/igvm.rs @@ -3,6 +3,7 @@ //! Loader implementation to load IGVM files. +use super::super::memory_layout::ChipsetMmioRanges; use guestmem::GuestMemory; use hvdef::HV_PAGE_SIZE; use igvm::IgvmDirectiveHeader; @@ -263,9 +264,7 @@ struct BuildDeviceTreeParams<'a> { with_vmbus_redirect: bool, com_serial: Option, entropy: Option<&'a [u8]>, - chipset_low_mmio: MemoryRange, - chipset_high_mmio: MemoryRange, - vtl2_chipset_mmio: MemoryRange, + chipset_mmio: ChipsetMmioRanges, } /// Build a device tree representing the whole guest partition. @@ -279,11 +278,15 @@ fn build_device_tree(params: BuildDeviceTreeParams<'_>) -> Result, fdt:: with_vmbus_redirect, com_serial, entropy, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio, } = params; + let ChipsetMmioRanges { + low: chipset_low_mmio, + high: chipset_high_mmio, + vtl2: vtl2_chipset_mmio, + } = chipset_mmio; + let mut buf = vec![0; HV_PAGE_SIZE as usize * 256]; let mut builder = fdt::builder::Builder::new(fdt::builder::BuilderConfig { @@ -523,12 +526,8 @@ pub struct LoadIgvmParams<'a, T: ArchTopology> { pub com_serial: Option, /// Entropy pub entropy: Option<&'a [u8]>, - /// VTL0 chipset low MMIO range for the device tree VMBus node. - pub chipset_low_mmio: MemoryRange, - /// VTL0 chipset high MMIO range for the device tree VMBus node. - pub chipset_high_mmio: MemoryRange, - /// VTL2-private chipset MMIO range for the device tree VTL2 VMBus node. - pub vtl2_chipset_mmio: MemoryRange, + /// Resolved chipset MMIO ranges for device tree and UEFI config. + pub chipset_mmio: ChipsetMmioRanges, } pub fn load_igvm( @@ -571,11 +570,15 @@ fn load_igvm_x86( with_vmbus_redirect, com_serial, entropy, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio, } = params; + let ChipsetMmioRanges { + low: chipset_low_mmio, + high: chipset_high_mmio, + .. + } = chipset_mmio; + let relocations_enabled = match vtl2_base_address { Vtl2BaseAddressType::File | Vtl2BaseAddressType::Vtl2Allocate { .. } => false, Vtl2BaseAddressType::Absolute(_) | Vtl2BaseAddressType::MemoryLayout { .. } => true, @@ -985,9 +988,7 @@ fn load_igvm_x86( with_vmbus_redirect, com_serial, entropy, - chipset_low_mmio, - chipset_high_mmio, - vtl2_chipset_mmio, + chipset_mmio, }) .map_err(Error::DeviceTree)?; import_parameter(&mut parameter_areas, info, &dt)?; diff --git a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs index 22c0600443..3fbcc57cc1 100644 --- a/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs +++ b/openvmm/openvmm_core/src/worker/vm_loaders/linux.rs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +use crate::worker::memory_layout::ChipsetMmioRanges; use guestmem::GuestMemory; use loader::importer::Aarch64Register; use loader::importer::X86Register; @@ -10,6 +11,7 @@ use loader::linux::InitrdAddressType; use loader::linux::InitrdConfig; use loader::linux::RegisterConfig; use loader::linux::ZeroPageConfig; +use memory_range::MemoryRange; use std::ffi::CString; use std::io::Seek; use thiserror::Error; @@ -148,6 +150,8 @@ fn build_dt( processor_topology: &ProcessorTopology, pcie_host_bridges: &[PcieHostBridge], smmu_configs: &[vmm_core::acpi_builder::AcpiSmmuConfig], + chipset_low_mmio: MemoryRange, + chipset_high_mmio: MemoryRange, initrd_start: u64, initrd_end: u64, ) -> Result, fdt::builder::Error> { @@ -540,13 +544,7 @@ fn build_dt( } } - // Build VMBus MMIO ranges from the memory layout's chipset MMIO gaps. - assert!( - cfg.mem_layout.mmio().len() >= 2, - "need at least two MMIO regions for VMBus DT node" - ); - let low_mmio_gap = cfg.mem_layout.mmio()[0]; - let high_mmio_gap = cfg.mem_layout.mmio()[1]; + // Build VMBus MMIO ranges from the chipset MMIO ranges. soc = soc .start_node("vmbus")? .add_u32(p_address_cells, 2)? @@ -555,10 +553,10 @@ fn build_dt( .add_u64_array( p_ranges, &[ - low_mmio_gap.start(), - low_mmio_gap.len(), - high_mmio_gap.start(), - high_mmio_gap.len(), + chipset_low_mmio.start(), + chipset_low_mmio.len(), + chipset_high_mmio.start(), + chipset_high_mmio.len(), ], )? .add_str(p_compatible, "microsoft,vmbus")? @@ -835,6 +833,7 @@ pub fn load_linux_arm64( processor_topology: &ProcessorTopology, pcie_host_bridges: &[PcieHostBridge], smmu_configs: &[vmm_core::acpi_builder::AcpiSmmuConfig], + chipset_mmio: &ChipsetMmioRanges, build_acpi: Option vmm_core::acpi_builder::BuiltAcpiTables>, ) -> Result, Error> { let mut loader = Loader::new(gm.clone(), cfg.mem_layout, hvdef::Vtl::Vtl0); @@ -896,6 +895,8 @@ pub fn load_linux_arm64( processor_topology, pcie_host_bridges, smmu_configs, + chipset_mmio.low, + chipset_mmio.high, initrd_start, initrd_end, ) diff --git a/openvmm/openvmm_core/src/worker/vm_loaders/uefi.rs b/openvmm/openvmm_core/src/worker/vm_loaders/uefi.rs index 1865c64ad4..0880e84329 100644 --- a/openvmm/openvmm_core/src/worker/vm_loaders/uefi.rs +++ b/openvmm/openvmm_core/src/worker/vm_loaders/uefi.rs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +use crate::worker::memory_layout::ChipsetMmioRanges; use guestmem::GuestMemory; use guid::Guid; use hvdef::HV_PAGE_SIZE; @@ -23,8 +24,6 @@ pub enum Error { Firmware(#[source] std::io::Error), #[error("uefi loader error")] Loader(#[source] loader::uefi::Error), - #[error("UEFI requires at least two MMIO ranges")] - UnsupportedMmio, #[error("failed to build PCIe ACPI tables")] PcieAcpi(#[source] vmm_core::acpi_builder::PcieAcpiBuildError), #[cfg(guest_arch = "aarch64")] @@ -56,15 +55,12 @@ pub fn load_uefi( mem_layout: &MemoryLayout, pcie_host_bridges: &[PcieHostBridge], load_settings: UefiLoadSettings, + chipset_mmio: &ChipsetMmioRanges, madt: &[u8], srat: &[u8], mcfg: Option<&[u8]>, pptt: Option<&[u8]>, ) -> Result, Error> { - if mem_layout.mmio().len() < 2 { - return Err(Error::UnsupportedMmio); - } - let mut loaded_image; let image = { loaded_image = Vec::new(); @@ -89,9 +85,6 @@ pub fn load_uefi( }) .collect(); - let low_mmio = mem_layout.mmio()[0]; - let high_mmio = mem_layout.mmio()[1]; - let flags = config::Flags::new() .with_hibernate_enabled(true) .with_serial_controllers_enabled(load_settings.serial) @@ -136,12 +129,12 @@ pub fn load_uefi( .add(&config::Entropy(entropy)) .add(&config::MmioRanges([ config::Mmio { - mmio_page_number_start: low_mmio.start() / HV_PAGE_SIZE, - mmio_size_in_pages: (low_mmio.end() - low_mmio.start()) / HV_PAGE_SIZE, + mmio_page_number_start: chipset_mmio.low.start() / HV_PAGE_SIZE, + mmio_size_in_pages: chipset_mmio.low.len() / HV_PAGE_SIZE, }, config::Mmio { - mmio_page_number_start: high_mmio.start() / HV_PAGE_SIZE, - mmio_size_in_pages: (high_mmio.end() - high_mmio.start()) / HV_PAGE_SIZE, + mmio_page_number_start: chipset_mmio.high.start() / HV_PAGE_SIZE, + mmio_size_in_pages: chipset_mmio.high.len() / HV_PAGE_SIZE, }, ])) .add(&config::ProcessorInformation { diff --git a/vm/acpi/src/dsdt.rs b/vm/acpi/src/dsdt.rs index e3584dda3b..5081302cb8 100644 --- a/vm/acpi/src/dsdt.rs +++ b/vm/acpi/src/dsdt.rs @@ -233,8 +233,12 @@ impl Dsdt { vmod.add_object(&NamedString::new(b"_HID", b"ACPI0004")); vmod.add_object(&NamedInteger::new(b"_UID", 0)); let mut vmod_crs = CurrentResourceSettings::new(); - vmod_crs.add_resource(&QwordMemory::new(low.start(), low.end() - low.start())); - vmod_crs.add_resource(&QwordMemory::new(high.start(), high.end() - high.start())); + if !low.is_empty() { + vmod_crs.add_resource(&QwordMemory::new(low.start(), low.len())); + } + if !high.is_empty() { + vmod_crs.add_resource(&QwordMemory::new(high.start(), high.len())); + } vmod.add_object(&vmod_crs); self.add_object(&vmod); } @@ -292,8 +296,10 @@ impl Dsdt { let mut crs = CurrentResourceSettings::new(); crs.add_resource(&BusNumber::new(0, 1)); crs.add_resource(&IoPort::new(0xcf8, 0xcf8, 8)); - crs.add_resource(&QwordMemory::new(low.start(), low.end() - low.start())); - crs.add_resource(&QwordMemory::new(high.start(), high.end() - high.start())); + crs.add_resource(&QwordMemory::new(low.start(), low.len())); + if !high.is_empty() { + crs.add_resource(&QwordMemory::new(high.start(), high.len())); + } pci0.add_object(&crs); self.add_object(&pci0); } diff --git a/vm/devices/firmware/firmware_pcat/Cargo.toml b/vm/devices/firmware/firmware_pcat/Cargo.toml index 57f48d0390..91aca142ed 100644 --- a/vm/devices/firmware/firmware_pcat/Cargo.toml +++ b/vm/devices/firmware/firmware_pcat/Cargo.toml @@ -11,6 +11,7 @@ generation_id.workspace = true chipset_device.workspace = true guestmem.workspace = true +memory_range.workspace = true vmcore.workspace = true vm_topology = { workspace = true, features = ["inspect"] } @@ -26,8 +27,5 @@ tracelimit.workspace = true tracing.workspace = true zerocopy.workspace = true -[dev-dependencies] -memory_range.workspace = true - [lints] workspace = true diff --git a/vm/devices/firmware/firmware_pcat/src/lib.rs b/vm/devices/firmware/firmware_pcat/src/lib.rs index 06e37fa0b9..d157079327 100644 --- a/vm/devices/firmware/firmware_pcat/src/lib.rs +++ b/vm/devices/firmware/firmware_pcat/src/lib.rs @@ -51,6 +51,7 @@ use zerocopy::IntoBytes; pub mod config { use guid::Guid; use inspect::Inspect; + use memory_range::MemoryRange; use vm_topology::memory::MemoryLayout; use vm_topology::processor::ProcessorTopology; use vm_topology::processor::x86::X86Topology; @@ -118,6 +119,10 @@ pub mod config { pub processor_topology: ProcessorTopology, /// The VM's memory layout pub mem_layout: MemoryLayout, + /// Chipset low MMIO range (below 4 GB). + pub chipset_low_mmio: MemoryRange, + /// Chipset high MMIO range (above RAM). + pub chipset_high_mmio: MemoryRange, /// The SRAT ACPI table reflected into the guest pub srat: Vec, /// Initial [Generation Id](generation_id) value @@ -243,8 +248,8 @@ const POST_IO_PORT: u16 = 0x80; #[derive(Debug, Error)] #[expect(missing_docs)] // self-explanatory variants pub enum PcatBiosDeviceInitError { - #[error("expected exactly 2 mmio holes, found {0}")] - IncorrectMmioHoles(usize), + #[error("PCAT requires non-empty chipset low and high MMIO ranges")] + IncorrectMmioHoles, #[error("invalid ROM size {0:x} bytes, expected 256KB")] InvalidRomSize(u64), #[error("error mapping ROM")] @@ -269,10 +274,8 @@ impl PcatBiosDevice { let initial_generation_id = config.initial_generation_id; - if config.mem_layout.mmio().len() != 2 { - return Err(PcatBiosDeviceInitError::IncorrectMmioHoles( - config.mem_layout.mmio().len(), - )); + if config.chipset_low_mmio.is_empty() || config.chipset_high_mmio.is_empty() { + return Err(PcatBiosDeviceInitError::IncorrectMmioHoles); } let mut rom_mems = Vec::new(); @@ -384,7 +387,14 @@ impl PcatBiosDevice { // Consumers: // - vmbios/source/bsp/em/smbios/Smbport.asm, // - core/src/MEM.ASM. - self.config.mem_layout.ram_above_4gb().to_mb() + self.config + .mem_layout + .ram() + .iter() + .filter(|r| r.range.end() >= 0x1_0000_0000) + .map(|r| r.range.len()) + .sum::() + .to_mb() } PcatAddress::SLEEP_STATES => { // The AMI BIOS wants to read a byte value of flags to determine @@ -402,7 +412,7 @@ impl PcatBiosDevice { } } PcatAddress::PCI_IO_GAP_START => { - self.config.mem_layout.mmio()[0].start().try_into().unwrap() + self.config.chipset_low_mmio.start().try_into().unwrap() } PcatAddress::PROCESSOR_STA_ENABLE => { // Read by the ACPI _STA (status) method in the Processor @@ -424,15 +434,18 @@ impl PcatBiosDevice { // - core/src/MEM.ASM. self.config .mem_layout - .ram_above_high_mmio() - .expect("validated exactly 2 mmio ranges") + .ram() + .iter() + .filter(|r| r.range.start() >= self.config.chipset_high_mmio.end()) + .map(|r| r.range.len()) + .sum::() .to_mb() } PcatAddress::HIGH_MMIO_GAP_BASE_IN_MB => { // Consumers: // - vmbios/source/bsp/em/smbios/Smbport.asm, // - core/src/MEM.ASM. - self.config.mem_layout.mmio()[1].start().to_mb() + self.config.chipset_high_mmio.start().to_mb() } PcatAddress::HIGH_MMIO_GAP_LENGTH_IN_MB => { // Consumers: @@ -444,7 +457,7 @@ impl PcatBiosDevice { // this code was written in Hyper-V, the `end - start` // calculation used an _inclusive_ `start..=end` range from the // MMIO gaps API, which wasn't properly compensated for here. - self.config.mem_layout.mmio()[1].len().to_mb() - 1 + self.config.chipset_high_mmio.len().to_mb() - 1 } PcatAddress::E820_ENTRY => handle_int15_e820_query( &self.config.mem_layout, @@ -453,7 +466,14 @@ impl PcatBiosDevice { ), PcatAddress::INITIAL_MEGABYTES_BELOW_GAP => { // Consumers: vmbios/source/bsp/em/smbios/smbios/Smbport.asm - self.config.mem_layout.ram_below_4gb().to_mb() + self.config + .mem_layout + .ram() + .iter() + .filter(|r| r.range.end() < 0x1_0000_0000) + .map(|r| r.range.len()) + .sum::() + .to_mb() } _ => { tracelimit::warn_ratelimited!(?addr, "unknown bios read"); diff --git a/vm/loader/src/common.rs b/vm/loader/src/common.rs index 8fa00ecfac..47486aaea0 100644 --- a/vm/loader/src/common.rs +++ b/vm/loader/src/common.rs @@ -10,6 +10,7 @@ use crate::importer::SegmentRegister; use crate::importer::TableRegister; use crate::importer::X86Register; use hvdef::HV_PAGE_SIZE; +use memory_range::MemoryRange; use std::io::Read; use std::io::Seek; use thiserror::Error; @@ -99,24 +100,17 @@ pub fn import_default_gdt( Ok(()) } -/// Returned when the MMIO layout is not supported. -#[derive(Debug, Error)] -#[error("exactly two MMIO gaps are required")] -pub struct UnsupportedMmio; - /// Computes the x86 variable MTRRs that describe the given memory layout. This /// is intended to be used to setup MTRRs for booting a guest with two mmio /// gaps, such as booting Linux, UEFI, or PCAT. -/// -/// N.B. Currently this panics if there are not exactly two MMIO ranges. pub fn compute_variable_mtrrs( memory: &MemoryLayout, physical_address_width: u8, -) -> Result, UnsupportedMmio> { + chipset_low_mmio: MemoryRange, + chipset_high_mmio: MemoryRange, +) -> Vec { const WRITEBACK: u64 = 0x6; - let &[mmio_gap_low, mmio_gap_high] = memory.mmio().try_into().map_err(|_| UnsupportedMmio)?; - // Clamp the width to something reasonable. let gpa_space_size = physical_address_width.clamp(36, 52); @@ -141,26 +135,47 @@ pub fn compute_variable_mtrrs( result.push(X86Register::MtrrPhysBase1(pcat_mtrr_size | WRITEBACK)); result.push(X86Register::MtrrPhysMask1(mtrr_mask( gpa_space_size, - mmio_gap_low.start() - 1, + chipset_low_mmio.start() - 1, ))); } // If there is more than ~3.8GB of memory, use MTRR 204 and MTRR Mask 205 to cover // the amount of memory above 4GB. - if memory.end_of_ram() > mmio_gap_low.end() { - result.push(X86Register::MtrrPhysBase2(mmio_gap_low.end() | WRITEBACK)); - result.push(X86Register::MtrrPhysMask2(mtrr_mask( - gpa_space_size, - mmio_gap_high.start() - 1, - ))); + if memory.end_of_ram() > chipset_low_mmio.end() { + result.push(X86Register::MtrrPhysBase2( + chipset_low_mmio.end() | WRITEBACK, + )); + if chipset_high_mmio.is_empty() { + // No high MMIO gap — RAM above the low gap is contiguous. + // Cover to the 8TB boundary (or GPA space limit) using the + // same split as the >64GB path below. + result.push(X86Register::MtrrPhysMask2(mtrr_mask( + gpa_space_size, + (1 << std::cmp::min(gpa_space_size, 43)) - 1, + ))); + if gpa_space_size > 43 { + result.push(X86Register::MtrrPhysBase3((1 << 43) | WRITEBACK)); + result.push(X86Register::MtrrPhysMask3(mtrr_mask( + gpa_space_size, + (1 << gpa_space_size) - 1, + ))); + } + } else { + result.push(X86Register::MtrrPhysMask2(mtrr_mask( + gpa_space_size, + chipset_high_mmio.start() - 1, + ))); + } } // If there is more memory than 64GB then use MTRR 206 and MTRR Mask 207 and possibly // MTRR 208 and MTRR Mask 209 depending on maximum address width. Both MTRR pairs are // used with the magic 8TB boundary to work around a bug in older Linux kernels // (e.g. RHEL 6.x, etc.) - if memory.end_of_ram() > mmio_gap_high.end() { - result.push(X86Register::MtrrPhysBase3(mmio_gap_high.end() | WRITEBACK)); + if !chipset_high_mmio.is_empty() && memory.end_of_ram() > chipset_high_mmio.end() { + result.push(X86Register::MtrrPhysBase3( + chipset_high_mmio.end() | WRITEBACK, + )); result.push(X86Register::MtrrPhysMask3(mtrr_mask( gpa_space_size, (1 << std::cmp::min(gpa_space_size, 43)) - 1, @@ -174,7 +189,7 @@ pub fn compute_variable_mtrrs( } } - Ok(result) + result } fn mtrr_mask(gpa_space_size: u8, maximum_address: u64) -> u64 { @@ -393,3 +408,141 @@ impl ChunkBuf { Ok(hasher.finalize()) } } + +#[cfg(test)] +mod tests { + use super::*; + + const GB: u64 = 1024 * 1024 * 1024; + const MB: u64 = 1024 * 1024; + + fn standard_low_mmio() -> MemoryRange { + // 128 MB gap below 4 GiB, matching the typical x86_64 config. + MemoryRange::new(4 * GB - 128 * MB..4 * GB) + } + + fn standard_high_mmio() -> MemoryRange { + // 512 MB immediately above 4 GiB. + MemoryRange::new(4 * GB..4 * GB + 512 * MB) + } + + fn make_layout(ram_size: u64) -> MemoryLayout { + MemoryLayout::new( + ram_size, + &[standard_low_mmio(), standard_high_mmio()], + &[], + &[], + None, + ) + .unwrap() + } + + /// Count MTRR base/mask pairs in the register list. + fn pair_count(regs: &[X86Register]) -> usize { + assert_eq!(regs.len() % 2, 0, "MTRR registers come in pairs"); + regs.len() / 2 + } + + #[test] + fn mtrr_128mb_exactly() { + // 128 MB = pcat_mtrr_size → only pair 0 (base covers 128 MB) + let layout = make_layout(128 * MB); + let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio()); + assert_eq!(pair_count(®s), 1); + assert_eq!(regs[0], X86Register::MtrrPhysBase0(0x6)); + } + + #[test] + fn mtrr_256mb() { + // 256 MB: pair 0 (128 MB) + pair 1 (128 MB..low_gap_start) + let layout = make_layout(256 * MB); + let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio()); + assert_eq!(pair_count(®s), 2); + assert_eq!(regs[0], X86Register::MtrrPhysBase0(0x6)); + // Pair 1 base = 128 MB | WRITEBACK + assert_eq!(regs[2], X86Register::MtrrPhysBase1((128 * MB) | 0x6)); + } + + #[test] + fn mtrr_2gb() { + // 2 GB: same structure as 256 MB (RAM is below 4 GiB) + let layout = make_layout(2 * GB); + let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio()); + assert_eq!(pair_count(®s), 2); + assert_eq!(regs[2], X86Register::MtrrPhysBase1((128 * MB) | 0x6)); + } + + #[test] + fn mtrr_8gb() { + // 8 GB: RAM above 4 GiB and above high MMIO end → pairs 0–4 + let layout = make_layout(8 * GB); + let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio()); + // Pair 0: 0..128 MB + // Pair 1: 128 MB..low gap start + // Pair 2: low gap end (4 GiB)..high gap start (4 GiB) — effectively empty but still emitted + // Pair 3: high gap end..8 TB boundary + // Pair 4: 8 TB.. (gpa_space_size > 43) + assert_eq!(pair_count(®s), 5); + + // Verify key base addresses + assert_eq!(regs[0], X86Register::MtrrPhysBase0(0x6)); + assert_eq!(regs[2], X86Register::MtrrPhysBase1((128 * MB) | 0x6)); + assert_eq!( + regs[4], + X86Register::MtrrPhysBase2(standard_low_mmio().end() | 0x6) + ); + assert_eq!( + regs[6], + X86Register::MtrrPhysBase3(standard_high_mmio().end() | 0x6) + ); + assert_eq!(regs[8], X86Register::MtrrPhysBase4((1u64 << 43) | 0x6)); + } + + #[test] + fn mtrr_8gb_no_high_mmio() { + // New behavior: no high MMIO gap. RAM above 4 GiB is contiguous. + // Uses the is_empty() branch. + let low = standard_low_mmio(); + let layout = MemoryLayout::new(8 * GB, &[low], &[], &[], None).unwrap(); + let regs = compute_variable_mtrrs(&layout, 46, low, MemoryRange::EMPTY); + // Pair 0: 0..128 MB + // Pair 1: 128 MB..low gap start + // Pair 2: low gap end..8 TB boundary (is_empty branch) + // Pair 3: 8 TB..GPA limit (gpa_space_size > 43) + assert_eq!(pair_count(®s), 4); + assert_eq!(regs[4], X86Register::MtrrPhysBase2(low.end() | 0x6)); + assert_eq!(regs[6], X86Register::MtrrPhysBase3((1u64 << 43) | 0x6)); + } + + #[test] + fn mtrr_narrow_address_width() { + // 40-bit address width (< 43): the 8 TB split is not needed. + let layout = make_layout(8 * GB); + let regs = compute_variable_mtrrs(&layout, 40, standard_low_mmio(), standard_high_mmio()); + // No pair 4 because gpa_space_size <= 43 + assert_eq!(pair_count(®s), 4); + assert_eq!( + regs[6], + X86Register::MtrrPhysBase3(standard_high_mmio().end() | 0x6) + ); + } + + #[test] + fn mtrr_masks_have_enabled_bit() { + // Every mask register should have the ENABLED bit (bit 11) set. + let layout = make_layout(8 * GB); + let regs = compute_variable_mtrrs(&layout, 46, standard_low_mmio(), standard_high_mmio()); + for (i, reg) in regs.iter().enumerate() { + match reg { + X86Register::MtrrPhysMask0(v) + | X86Register::MtrrPhysMask1(v) + | X86Register::MtrrPhysMask2(v) + | X86Register::MtrrPhysMask3(v) + | X86Register::MtrrPhysMask4(v) => { + assert!(v & (1 << 11) != 0, "mask at index {i} missing ENABLED bit"); + } + _ => {} // base registers + } + } + } +} diff --git a/vm/vmcore/vm_topology/src/memory.rs b/vm/vmcore/vm_topology/src/memory.rs index bf0fe05ea6..04634d7ec8 100644 --- a/vm/vmcore/vm_topology/src/memory.rs +++ b/vm/vmcore/vm_topology/src/memory.rs @@ -381,41 +381,6 @@ impl MemoryLayout { self.ram.last().expect("mmio set").range.end() } - /// The bytes of RAM below 4GB. - pub fn ram_below_4gb(&self) -> u64 { - self.ram - .iter() - .filter(|r| r.range.end() < FOUR_GB) - .map(|r| r.range.len()) - .sum() - } - - /// The bytes of RAM at or above 4GB. - pub fn ram_above_4gb(&self) -> u64 { - self.ram - .iter() - .filter(|r| r.range.end() >= FOUR_GB) - .map(|r| r.range.len()) - .sum() - } - - /// The bytes of RAM above the high MMIO gap. - /// - /// Returns None if there aren't exactly 2 MMIO gaps. - pub fn ram_above_high_mmio(&self) -> Option { - if self.mmio.len() != 2 { - return None; - } - - Some( - self.ram - .iter() - .filter(|r| r.range.start() >= self.mmio[1].end()) - .map(|r| r.range.len()) - .sum(), - ) - } - /// The ending RAM address below 4GB. /// /// Returns None if there is no RAM mapped below 4GB. diff --git a/vmm_core/src/acpi_builder.rs b/vmm_core/src/acpi_builder.rs index cc1c2642cb..5d4dbad5a8 100644 --- a/vmm_core/src/acpi_builder.rs +++ b/vmm_core/src/acpi_builder.rs @@ -754,7 +754,7 @@ impl AcpiTablesBuilder<'_, T> { /// Returns tables that should be loaded at the supplied gpa. pub fn build_acpi_tables(&self, gpa: u64, add_devices_to_dsdt: F) -> BuiltAcpiTables where - F: FnOnce(&MemoryLayout, &mut dsdt::Dsdt), + F: FnOnce(&mut dsdt::Dsdt), { let mut dsdt_data = dsdt::Dsdt::new(); // Name(\_S0, Package(2){0, 0}) @@ -768,7 +768,7 @@ impl AcpiTablesBuilder<'_, T> { &dsdt::Package(vec![0, 0]), )); // Add any chipset devices. - add_devices_to_dsdt(self.mem_layout, &mut dsdt_data); + add_devices_to_dsdt(&mut dsdt_data); // Add processor devices: // Device(P###) { Name(_HID, "ACPI0007") Name(_UID, #) Method(_STA, 0) { Return(0xF) } } for proc_index in 1..self.processor_topology.vp_count() + 1 { @@ -1306,7 +1306,7 @@ mod test { let builder = new_builder(&mem, &topology, &pcie_host_bridges); assert!(builder.build_iort().is_none()); - let tables = builder.build_acpi_tables(0x100000, |_, _| {}); + let tables = builder.build_acpi_tables(0x100000, |_| {}); assert!(!contains_signature(&tables.tables, b"IORT")); } @@ -1335,7 +1335,7 @@ mod test { }]; let builder = new_aarch64_builder(&mem, &topology, &pcie_host_bridges); - let tables = builder.build_acpi_tables(0x100000, |_, _| {}); + let tables = builder.build_acpi_tables(0x100000, |_| {}); assert!(contains_signature(&tables.tables, b"MCFG")); assert!(contains_signature(&tables.tables, b"IORT")); } @@ -1393,7 +1393,7 @@ mod test { }]; let builder = new_builder(&mem, &topology, &pcie_host_bridges); - let tables = builder.build_acpi_tables(0x100000, |_, _| {}); + let tables = builder.build_acpi_tables(0x100000, |_| {}); assert!(contains_signature(&tables.tables, b"CEDT")); }