|
1 | | -# Crate Name |
| 1 | +# vm-allocator crate |
2 | 2 |
|
3 | 3 | ## Design |
| 4 | +vm-allocator is designed about resource and resource allocator being responsible |
| 5 | +for system resources allocation for virtual machine(VM). |
4 | 6 |
|
5 | | -TODO: This section should have a high-level design of the crate. |
| 7 | +It provides different kinds of resources that might be used by VM, such as |
| 8 | +memory-maped I/O address space, port I/O address space, legacy IRQ numbers, MSI/MSI-X |
| 9 | +vectors, device instance id, etc. All these resources should be implemented with |
| 10 | +allocation and freeing mechanism in this crate, in order to make VMM easier to |
| 11 | +construct. |
6 | 12 |
|
7 | | -Some questions that might help in writing this section: |
8 | | -- What is the purpose of this crate? |
9 | | -- What are the main components of the crate? How do they interact which each |
10 | | - other? |
| 13 | +Main components are listed below. |
| 14 | +- A `Resource` trait representing for all kinds of resources. |
| 15 | +```rust |
| 16 | +pub trait Resource {} |
| 17 | +``` |
| 18 | + |
| 19 | +- A `ResourceSize` trait representing for all kinds of resource size. |
| 20 | +```rust |
| 21 | +pub trait ResourceSize {} |
| 22 | +``` |
| 23 | + |
| 24 | +- A `ResourceAllocator` trait representing for all kinds of resources allocation, freeing |
| 25 | +and updating. |
| 26 | +```rust |
| 27 | +pub trait ResourceAllocator<T: Resource, S: ResourceSize> { |
| 28 | + /// Allocate some resource with given `resource` and `size`. |
| 29 | + /// |
| 30 | + /// # Arguments |
| 31 | + /// |
| 32 | + /// * `resource`: resource to be allocate. |
| 33 | + /// * `size`: resource size of allocation request. |
| 34 | + fn allocate( |
| 35 | + &mut self, |
| 36 | + resource: Option<T>, |
| 37 | + size: S, |
| 38 | + ) -> Result<T, Error>; |
| 39 | + |
| 40 | + /// Free resource specified by given `resource` and `size`. |
| 41 | + /// |
| 42 | + /// # Arguments |
| 43 | + /// |
| 44 | + /// * `resource`: resource to be free. |
| 45 | + /// * `size`: resource size of free request. |
| 46 | + fn free(&mut self, resource: T, size: S); |
| 47 | + |
| 48 | + /// Update resource, freeing the old one and allocating a new range. |
| 49 | + /// |
| 50 | + /// This is mostly used when guest allocated some resources or reprogrammed, |
| 51 | + /// so the new allocated resources should be marked as used, and the preallocated |
| 52 | + /// one should be freed (if has). |
| 53 | + /// |
| 54 | + /// # Arguments |
| 55 | + /// |
| 56 | + /// * `old_resource`: old resource to be free or no such preallocated resources. |
| 57 | + /// * `old_size`: old resource size of free request or no such preallocated resources. |
| 58 | + /// * `resource`: resource to be free. |
| 59 | + /// * `size`: resource size of free request. |
| 60 | + fn update(&mut self, old_resource: Option<T>, old_size: Option<S>, new_resource: T, new_size: S); |
| 61 | +} |
| 62 | +``` |
| 63 | + |
| 64 | +Different kinds of resources and its allocators should implement the three traits. Take |
| 65 | +address space resource for example. |
| 66 | + |
| 67 | +```rust |
| 68 | +impl Resource for GuestAddress {} |
| 69 | + |
| 70 | +// This should be implemented for u64. |
| 71 | +// Otherwise implementing for GuestUsize (aka. <GuestAddress as AddressValue>::V) |
| 72 | +// would impact other implementation such as instance id since compiler |
| 73 | +// doesn't know real type of V. |
| 74 | +impl ResourceSize for u64 {} |
| 75 | + |
| 76 | +pub struct AddressAllocator { |
| 77 | + base: GuestAddress, |
| 78 | + end: GuestAddress, |
| 79 | + alignment: GuestUsize, |
| 80 | + ranges: BTreeMap<GuestAddress, GuestUsize>, |
| 81 | +} |
| 82 | + |
| 83 | +impl ResourceAllocator<GuestAddress, GuestUsize> for AddressAllocator { |
| 84 | + /// Allocates a range of addresses from the managed region. |
| 85 | + fn allocate( |
| 86 | + &mut self, |
| 87 | + address: Option<GuestAddress>, |
| 88 | + size: GuestUsize, |
| 89 | + ) -> Result<GuestAddress> {...} |
| 90 | + |
| 91 | + /// Free an already allocated address range. |
| 92 | + fn free(&mut self, address: GuestAddress, size: GuestUsize) {...} |
| 93 | + |
| 94 | + /// Update an address range with a new one. |
| 95 | + fn update(&mut self, old_resource: Option<T>, old_size: Option<S>, new_resource: T, new_size: S) {...} |
| 96 | +``` |
| 97 | + |
| 98 | +Another resource being used by VMM is unsigned integer resource, |
| 99 | +like IRQ numbers including legacy IRQ numbers and MSI/MSI-X vectors, device instance id. |
| 100 | + |
| 101 | +```rust |
| 102 | +impl Resource for u32 {} |
| 103 | +impl ResourceSize for u32 {} |
| 104 | +pub struct IdAllocator { |
| 105 | + start: u32, |
| 106 | + end: u32, |
| 107 | + used: Vec<u32>, |
| 108 | +} |
| 109 | +impl ResourceAllocator<u32, u32> for IdAllocator {...} |
| 110 | +``` |
| 111 | + |
| 112 | +### Design Note: |
| 113 | + |
| 114 | +VMM is responsible for system level resources allocation, and some principles and |
| 115 | +special cases should be taken into account. |
| 116 | + |
| 117 | +- Vmm probably need record which kinds of resources it has, like how many IRQ numbers, how |
| 118 | +many IO address ranges. |
| 119 | + |
| 120 | +Let VMM design a `SystemAllocator` struct because only VMM knows exactly which resources it |
| 121 | +needs and whether it needs thread safe. |
| 122 | + |
| 123 | +Meanwhile, different resource instances are not suggested to being put together using like |
| 124 | +a hashmap `Hashmap<String, Arc<Mutex<ResourceAllocator>>>`. This would make allocation |
| 125 | +harder to know the exact resource type and value. |
| 126 | + |
| 127 | +- Vmm probably need a `ResourceMap` struct recording the resources belong to each device, |
| 128 | +so that VMM could inform vm-allocator crate to update the resource map changing and do |
| 129 | +unregistering/freeing work once the device lifetime comes to the end. |
| 130 | + |
| 131 | +```rust |
| 132 | +/// Describe a device resource set. |
| 133 | +pub struct ResourceSet { |
| 134 | + pub instance_id: u32, |
| 135 | + pub irq: u32, |
| 136 | + pub io_resources: Vec<IoResource>, |
| 137 | +} |
| 138 | + |
| 139 | +/// Describe a device resource mapping. |
| 140 | +pub struct ResourceMap { |
| 141 | + pub map: BTreeMap<Arc<Mutex<dyn Device>>, ResourceSet>, |
| 142 | +} |
| 143 | + |
| 144 | +pub struct Vmm { |
| 145 | + ... |
| 146 | + pub resource_map: ResourceMap, |
| 147 | + ... |
| 148 | +} |
| 149 | +``` |
| 150 | + |
| 151 | +- Vmm should pass some resources/allocators (e.g. `SystemAllocator` struct) into each |
| 152 | +Vcpu thread, in case that guest might change some resources usage for devices, like PCI BAR |
| 153 | +reprogramming, MSI/MSI-X vectors informing by a write operation from guest. |
11 | 154 |
|
12 | 155 | ## Usage |
13 | 156 |
|
14 | | -TODO: This section describes how the crate is used. |
| 157 | +This crate would be implemented by VMM according to what kinds of resources it needs on demand. |
| 158 | +For example, simple VMM might need only IRQ numbers and device instance ids, and they don't quite |
| 159 | +need manage IO address space. In suce case, they can simply define the two instances as a |
| 160 | +type of `IdAllocator`. |
15 | 161 |
|
16 | | -Some questions that might help in writing this section: |
17 | | -- What traits do users need to implement? |
18 | | -- Does the crate have any default/optional features? What is each feature |
19 | | - doing? |
20 | | -- Is this crate used by other rust-vmm components? If yes, how? |
| 162 | +The VMM decides whether it needs a collection of resources which might be called like |
| 163 | +`SystemAllocator`. Meanwhile, the thread safe is ensured by VMM. |
| 164 | + |
| 165 | +If a VMM needs a new type of resource other than address or integer, it can either implement |
| 166 | +`Resource`, `ResourceSize` and `ResourceAllocator` traits for its newly defined resource struct, |
| 167 | +or considering add this into vm-allocator crate later. |
21 | 168 |
|
22 | 169 | ## Examples |
23 | 170 |
|
24 | | -TODO: Usage examples. |
| 171 | +This demonstrates how VMM uses vm-allocator crate to manage resources. |
| 172 | +Firstly, assuming VMM implements a SystemAllocator for collecting resources. |
| 173 | + |
| 174 | +```rust |
| 175 | +//! system_allocator.rs |
| 176 | +//! Assuming the VMM only needs three resources: instance_id, irq, mmio address space. |
| 177 | + |
| 178 | +use vm-allocator::{Resource, ResourceAllocator, ResourceSize}; |
| 179 | +use vm-allocator::{AddressAllocator, IdAllocator}; |
| 180 | + |
| 181 | +// This would be as a member of Vmm. |
| 182 | +// A Clone needs to be passed into every Vcpu thread, because when resources are changed |
| 183 | +// by guest, this needs to be changed accordingly. |
| 184 | +// |
| 185 | +// A design of Arc<Mutex<>> works but it really depends on design and allocator type. |
| 186 | +pub struct SystemAllocator { |
| 187 | + pub instance_id: Arc<Mutex<IdAllocator>>, |
| 188 | + pub irq: Arc<Mutex<IdAllocator>>, |
| 189 | + pub mmio_range: Arc<Mutex<AddressAllocator>>, |
| 190 | +} |
| 191 | + |
| 192 | +impl SystemAllocator { |
| 193 | + pub fn new(some_parameters) -> Self { |
| 194 | + } |
| 195 | + |
| 196 | + // MSI/MSI-X vectors are allocated/specified by guest driver, so the virtual interrupt |
| 197 | + // number resource needs to be updated after device receives the information, |
| 198 | + // by a BAR writing operation. |
| 199 | + pub fn update_irq(&mut self, vector: u32) -> Result<()> { |
| 200 | + // Normally, MSI/MSI-X vectors are allocated from non-used kernel interrupt resource. |
| 201 | + // Otherwise, this interrupt should not be expected by kernel. |
| 202 | + self.irq.lock().expect("failed").allocate(Some(vector), 1); |
| 203 | + } |
| 204 | + |
| 205 | + /// PCI BAR might be reprogrammed by guest kernel. |
| 206 | + pub fn update_mmio_addr(&mut self, old_addr: GuestAddress, old_size: GuestUsize, |
| 207 | + new_addr: GuestAddress, new_size: GuestUsize) -> Result<()> { |
| 208 | + // Check if old_addr exists and free it. |
| 209 | + self.mmio_range.lock().expect("failed").free(old_addr, old_size); |
| 210 | + // Check if new_addr is valid and allocate it. |
| 211 | + self.mmio_range.lock().expect("failed").allocate(new_addr, new_size); |
| 212 | + } |
| 213 | + |
| 214 | + /// Allocate an instance id for device. |
| 215 | + pub fn allocate_device_id(&mut self) -> Result<u32> {...} |
| 216 | + |
| 217 | + pub fn free_device_id(&mut self, id: u32) {...} |
| 218 | + |
| 219 | + /// Allocate an IRQ number. |
| 220 | + /// |
| 221 | + /// * `irq`: specify a specific number or any value. |
| 222 | + pub fn allocate_irq(&mut self, irq: Option<u32>) -> Result<u32> {...} |
| 223 | + |
| 224 | + pub fn free_irq(&mut self, id: u32) {...} |
| 225 | + |
| 226 | + /// Allocate mmio address and size. |
| 227 | + pub fn allocate_mmio_addr(&mut self, addr: Option<GuestAddress>, size: GuestUsize) -> Result<GuestAddress> {...} |
| 228 | + |
| 229 | + pub fn free_mmio_addr(&mut self, addr: GuestAddress, size: GuestUsize) {...} |
| 230 | + ... |
| 231 | +} |
| 232 | +``` |
| 233 | + |
| 234 | +The VMM initalization work flow is as follows. |
25 | 235 |
|
26 | 236 | ```rust |
27 | | -use my_crate; |
| 237 | +use vm_allocator::{Resource, ResourceAllocator, ResourceSize}; |
| 238 | +use vm_device::DeviceManager; |
28 | 239 |
|
| 240 | +let vmm = Vmm::new(); |
| 241 | +// Other initialization related to the vmm |
29 | 242 | ... |
| 243 | +// Initialize the SystemAllocator and add Resource Allocator to it. |
| 244 | +let sys_alloc = SystemAllocator::new(some_parameters); |
| 245 | + |
| 246 | +// Initialize the DeviceManager. |
| 247 | +let dev_mgr = DeviceManager::new(); |
| 248 | + |
| 249 | +// Initialize a PCI device. |
| 250 | +let pci_dev = Arc::new(Mutex::new(DummyPciDevice::new())); |
| 251 | + |
| 252 | +// Allocate IRQ, instance id, MMIO space for the dummy pci device. |
| 253 | +let id = sys_alloc.allocate_instance_id().unwrap(); |
| 254 | +let irq = sys_alloc.allocate_irq(None).unwrap(); |
| 255 | +let addr = sys_alloc.allocate_mmio_addr(None, 0x100); |
| 256 | + |
| 257 | +// The VMM needs a structure to store all the resources mapping to device instance |
| 258 | +// used for unregistering and resource freeing. |
| 259 | +vmm.resource_map.insert(ResourceSet{id, irq, Range{addr, 0x100}}, pci_dev.clone()); |
| 260 | + |
| 261 | +// Insert the device into DeviceManager with preallocated resources. |
| 262 | +dev_mgr.register_device(pci_dev, id, Some(irq), vec![addr]); |
| 263 | + |
| 264 | +// Other operation for vmm. |
| 265 | +... |
| 266 | + |
| 267 | +// Unregister this device like unhotplug case. |
| 268 | + |
| 269 | +// Firstly, get all the resources belong to the unplugged device. |
| 270 | +let set = vmm.resource_map.get(pci_dev.clone()); |
| 271 | + |
| 272 | +// Unregister this device from DeviceManager. |
| 273 | +dev_mgr.unregister_device(set.id); |
| 274 | + |
| 275 | +// Free resources of this device. |
| 276 | +sys_alloc.free_device_id(set.id); |
| 277 | +sys_alloc.free_irq(set.irq); |
| 278 | +sys_alloc.free_mmio_addr(set.addr, set.size); |
| 279 | + |
| 280 | +``` |
| 281 | + |
| 282 | +Some resources changing might happen during vcpu running. Take MSI vector for example. |
| 283 | +```rust |
| 284 | +pub struct Vcpu { |
| 285 | + fd: VcpuFd, |
| 286 | + id: u8, |
| 287 | + device_manager: Arc<Mutex<DeviceManager>>, |
| 288 | + system_allocator: SystemAllocator, |
| 289 | +} |
| 290 | + |
| 291 | +pub enum ResourceChange { |
| 292 | + NoChange, |
| 293 | + IrqOccupying(irq), |
| 294 | + IoReprogramming(addr, size), |
| 295 | +} |
| 296 | + |
| 297 | +impl Vcpu { |
| 298 | + // Runs the VCPU until it exits, returning the reason. |
| 299 | + pub fn run(&self) -> Result<()> { |
| 300 | + match self.fd.run() { |
| 301 | + VcpuExit::MmioWrite(addr, data) => { |
| 302 | + // Guest write MSI vector information to device BAR. |
| 303 | + match self.device_manager.mmio_bus.write(GuestAddress{addr as u64}, data) { |
| 304 | + // A MSI vector set is written back from guest |
| 305 | + // Check if there is resource changing. |
| 306 | + ResourceChange::IrqOccupying(irq) => { |
| 307 | + // Call vm_allocator to update the irq resource. |
| 308 | + self.system_allocator.update_irq(irq).map_err(); |
| 309 | + // Call vm_device to update. |
| 310 | + self.device_manager.update_irq(irq).map_err(); |
| 311 | + }, |
| 312 | + // PCI BAR is reprogrammed by guest. |
| 313 | + ResourceChange::IoReprogramming(addr, size) => { |
| 314 | + // Call vm_allocator to update the io resource. |
| 315 | + self.system_allocator.update_mmio_addr(addr, size).map_err(); |
| 316 | + // Call vm_device to update. |
| 317 | + self.device_manager.update_mmio_addr(addr, size).map_err(); |
| 318 | + }, |
| 319 | + ResourceChange::NoChange => Ok(()), |
| 320 | + } |
| 321 | + } |
| 322 | + ... |
| 323 | + } |
| 324 | + } |
| 325 | +} |
| 326 | + |
30 | 327 | ``` |
31 | 328 |
|
| 329 | + |
32 | 330 | ## License |
33 | 331 |
|
34 | 332 | **!!!NOTICE**: The BSD-3-Clause license is not included in this template. |
|
0 commit comments