Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions openvmm/membacking/src/mapping_manager/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ pub struct MappingParams {
/// that external consumers (vhost-user backends, etc.) can share the
/// backing memory.
pub dma_target: bool,
/// Host NUMA node for this mapping. `None` means OS default placement.
pub numa_node: Option<u32>,
}

struct Mappers {
Expand Down Expand Up @@ -440,6 +442,7 @@ mod tests {
file_offset: 0,
writable: true,
dma_target: true,
numa_node: None,
})
.await;

Expand All @@ -450,6 +453,7 @@ mod tests {
file_offset: 0,
writable: true,
dma_target: false,
numa_node: None,
})
.await;

Expand Down Expand Up @@ -481,6 +485,7 @@ mod tests {
file_offset: 0,
writable: true,
dma_target: false,
numa_node: None,
})
.await;

Expand Down
89 changes: 74 additions & 15 deletions openvmm/membacking/src/mapping_manager/va_mapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,22 +120,63 @@ impl MapperTask {
mappable,
writable,
file_offset,
numa_node,
..
}) => {
tracing::debug!(%range, "mapping received for range");

self.inner
.mapping
.map_file(
range.start() as usize,
range.len() as usize,
&mappable,
file_offset,
writable,
)
.expect("oom mapping file");

self.wake_waiters(range, Some(writable));
let map_result = {
#[cfg(unix)]
{
self.inner.mapping.map_file(
range.start() as usize,
range.len() as usize,
&mappable,
file_offset,
writable,
)
}
#[cfg(windows)]
{
self.inner.mapping.map_file_numa(
range.start() as usize,
range.len() as usize,
&mappable,
file_offset,
writable,
numa_node,
)
}
};

match map_result {
Ok(()) => {
#[cfg(target_os = "linux")]
if let Some(node) = numa_node {
if let Err(e) = self.inner.mapping.mbind_at(
range.start() as usize,
range.len() as usize,
node,
) {
tracing::error!(
error = &e as &dyn std::error::Error,
%range,
node,
"NUMA binding failed, using default placement"
);
}
}
self.wake_waiters(range, Some(writable));
}
Err(e) => {
tracing::error!(
error = &e as &dyn std::error::Error,
%range,
"failed to map file for range"
);
self.wake_waiters(range, None);
}
}
}
MapperRequest::NoMapping(range) => {
// Wake up waiters. They'll see a failure when they try to
Expand Down Expand Up @@ -290,12 +331,30 @@ impl VaMapper {
self.process.as_ref()
}

/// Allocates private anonymous memory for a range within the mapping.
/// Allocates private anonymous memory for a range within the mapping,
/// optionally bound to a specific host NUMA node.
///
/// This replaces the placeholder at the given offset with committed
/// anonymous memory.
pub(crate) fn alloc_range(&self, offset: usize, len: usize) -> Result<(), std::io::Error> {
self.inner.mapping.alloc(offset, len)
pub(crate) fn alloc_range(
&self,
offset: usize,
len: usize,
numa_node: Option<u32>,
) -> Result<(), std::io::Error> {
#[cfg(windows)]
{
self.inner.mapping.alloc_numa(offset, len, numa_node)
}
#[cfg(unix)]
{
self.inner.mapping.alloc(offset, len)?;
#[cfg(target_os = "linux")]
if let Some(node) = numa_node {
self.inner.mapping.mbind_at(offset, len, node)?;
}
Ok(())
}
}

/// Names a range within the mapping for debugging (visible in smaps).
Expand Down
2 changes: 2 additions & 0 deletions openvmm/membacking/src/memory_manager/device_memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ impl MappedMemoryRegion for DeviceMemoryRegion {
new_mapping.mappable.clone(),
new_mapping.file_offset,
new_mapping.writable,
None,
));
}
state.mappings.push(new_mapping);
Expand Down Expand Up @@ -173,6 +174,7 @@ impl MappableGuestMemory for DeviceMemoryControl {
mapping.mappable.clone(),
mapping.file_offset,
mapping.writable,
None,
)
.await;
}
Expand Down
25 changes: 24 additions & 1 deletion openvmm/membacking/src/memory_manager/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ struct RamBacking {
/// THP is enabled for this backing.
#[cfg_attr(not(target_os = "linux"), expect(dead_code))]
transparent_hugepages: bool,
/// Host NUMA node for this backing. `None` means OS default placement.
host_numa_node: Option<u32>,
}

#[derive(Debug)]
Expand Down Expand Up @@ -193,6 +195,7 @@ pub struct RamBackingRequest {
hugepages: bool,
hugepage_size: Option<u64>,
existing_mappable: Option<Mappable>,
host_numa_node: Option<u32>,
}

impl RamBackingRequest {
Expand All @@ -209,6 +212,7 @@ impl RamBackingRequest {
hugepages: false,
hugepage_size: None,
existing_mappable: None,
host_numa_node: None,
}
}

Expand Down Expand Up @@ -244,6 +248,13 @@ impl RamBackingRequest {
self.existing_mappable = Some(mappable);
self
}

/// Bind this backing's memory to a specific host NUMA node
/// (Linux: `mbind(MPOL_BIND)`, Windows: `MemExtendedParameterNumaNode`).
pub fn host_numa_node(mut self, node: Option<u32>) -> Self {
self.host_numa_node = node;
self
}
Comment on lines +252 to +257
}

fn validate_hugepage_size(size: u64) -> Result<usize, MemoryBuildError> {
Expand Down Expand Up @@ -441,6 +452,7 @@ impl GuestMemoryBuilder {
ranges: req.ranges,
prefetch: req.prefetch,
transparent_hugepages: req.transparent_hugepages,
host_numa_node: req.host_numa_node,
});
continue;
}
Expand Down Expand Up @@ -479,11 +491,13 @@ impl GuestMemoryBuilder {
.into()
}
};

backings.push(RamBacking {
mappable: Some(mappable),
ranges: req.ranges,
prefetch: req.prefetch,
transparent_hugepages: false,
host_numa_node: req.host_numa_node,
});
}

Expand Down Expand Up @@ -549,11 +563,20 @@ impl GuestMemoryBuilder {
mappable.clone(),
file_offset,
true,
backing.host_numa_node,
)
.await;
// TODO: file-backed RAM mappings are established lazily
// via page faults, so NUMA binding errors are not
// caught here. Replace lazy mapping with eager push
// model to propagate errors at build time.
} else {
va_mapper
.alloc_range(sub_range.start() as usize, sub_range.len() as usize)
.alloc_range(
sub_range.start() as usize,
sub_range.len() as usize,
backing.host_numa_node,
)
.map_err(|e| MemoryBuildError::PrivateRamAlloc(e, *sub_range))?;
va_mapper.set_range_name(
sub_range.start() as usize,
Expand Down
6 changes: 6 additions & 0 deletions openvmm/membacking/src/region_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ struct RegionMappingParams {
mappable: Mappable,
file_offset: u64,
writable: bool,
numa_node: Option<u32>,
}

fn range_within(outer: MemoryRange, inner: MemoryRange) -> MemoryRange {
Expand Down Expand Up @@ -589,6 +590,7 @@ impl RegionManagerTask {
file_offset: params.file_offset,
writable: params.writable,
dma_target: region.params.dma_target,
numa_node: params.numa_node,
})
.await;

Expand Down Expand Up @@ -684,6 +686,7 @@ impl RegionManagerTaskInner {
file_offset: mapping.params.file_offset,
writable: mapping.params.writable && map_params.writable,
dma_target: region.params.dma_target,
numa_node: mapping.params.numa_node,
})
.await;
}
Expand Down Expand Up @@ -917,6 +920,7 @@ impl RegionHandle {
mappable: Mappable,
file_offset: u64,
writable: bool,
numa_node: Option<u32>,
) {
let _ = self
.req_send
Expand All @@ -929,6 +933,7 @@ impl RegionHandle {
mappable,
file_offset,
writable,
numa_node,
},
),
)
Expand Down Expand Up @@ -1127,6 +1132,7 @@ mod tests {
mappable: self.mappable.clone(),
file_offset: 0,
writable: true,
numa_node: None,
},
)
.await;
Expand Down
71 changes: 71 additions & 0 deletions support/sparse_mmap/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -378,4 +378,75 @@ mod tests {
);
}
}

#[test]
#[cfg(any(target_os = "linux", windows))]
fn test_alloc_numa_node0() {
let page_size = SparseMapping::page_size();
let size = 4 * page_size;
let mapping = SparseMapping::new(size).unwrap();

// Allocate with NUMA node 0 (always present).
#[cfg(unix)]
{
mapping.alloc(0, size).unwrap();
mapping.mbind_at(0, size, 0).unwrap();
}
#[cfg(windows)]
mapping.alloc_numa(0, size, Some(0)).unwrap();

// Memory should be accessible and writable.
let pattern = vec![0xABu8; page_size];
mapping.write_at(0, &pattern).unwrap();
let mut buf = vec![0u8; page_size];
mapping.read_at(0, &mut buf).unwrap();
assert_eq!(buf, pattern);
}

#[test]
#[cfg(any(target_os = "linux", windows))]
fn test_map_file_numa_node0() {
let page_size = SparseMapping::page_size();
let size = 4 * page_size;
let mapping = SparseMapping::new(size).unwrap();
let shmem = alloc_shared_memory(size, "test-numa").unwrap();

// Map with NUMA node 0 (always present).
#[cfg(unix)]
{
mapping.map_file(0, size, &shmem, 0, true).unwrap();
mapping.mbind_at(0, size, 0).unwrap();
}
#[cfg(windows)]
mapping
.map_file_numa(0, size, &shmem, 0, true, Some(0))
.unwrap();

// Memory should be accessible and writable.
let pattern = vec![0xCDu8; page_size];
mapping.write_at(0, &pattern).unwrap();
let mut buf = vec![0u8; page_size];
mapping.read_at(0, &mut buf).unwrap();
assert_eq!(buf, pattern);
}

#[test]
#[cfg(any(target_os = "linux", windows))]
fn test_alloc_numa_invalid_node() {
let page_size = SparseMapping::page_size();
let mapping = SparseMapping::new(page_size).unwrap();

// A very large NUMA node number should fail with an error (not panic).
#[cfg(unix)]
{
mapping.alloc(0, page_size).unwrap();
let result = mapping.mbind_at(0, page_size, 99999);
assert!(result.is_err());
}
#[cfg(windows)]
{
let result = mapping.alloc_numa(0, page_size, Some(99999));
assert!(result.is_err());
}
}
}
Loading
Loading