diff --git a/.github/workflows/flake-bump-auto-approve.yaml b/.github/workflows/flake-bump-auto-approve.yaml new file mode 100644 index 0000000000..4bc796980c --- /dev/null +++ b/.github/workflows/flake-bump-auto-approve.yaml @@ -0,0 +1,72 @@ +name: Flake bump auto approve +on: + pull_request: + paths: + - 'flake.lock' + branches: + - gardenlinux + +jobs: + gitlint: + name: Flake bump auto approve + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + - name: Set up Python 3.11 + uses: actions/setup-python@v6 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install --upgrade gitlint + # this rule checks the prerequisits and write the exit code in its output + - name: Lint git commit messages + id: gitlint + run: | + set +e + gitlint --commits origin/$GITHUB_BASE_REF.. -C .gitlint_auto_approve + code=$? + if [ $code -eq 0 ]; then + echo "this merge request is eligible for a flake bump auto approve and merge" + else + echo "this merge request will not be automatically approved." + fi + echo "exit_code=$code" >> "$GITHUB_OUTPUT" + exit 0 + # the following steps only run if gitlint run successful + - name: Create variables + if: steps.gitlint.outputs.exit_code == '0' + id: create_variable + run: | + REPO=$(echo ${GITHUB_REPOSITORY} | cut -f 2 -d '/') + OWNER=$(echo ${GITHUB_REPOSITORY} | cut -f 1 -d '/') + echo "repo=$REPO" >> "$GITHUB_OUTPUT" + echo "owner=$OWNER" >> "$GITHUB_OUTPUT" + - name: Generate token + if: steps.gitlint.outputs.exit_code == '0' && steps.create_variable.outputs.repo != '' + id: generate_token + uses: actions/create-github-app-token@v2 + with: + app-id: ${{ secrets.GH_AUTO_APPROVE_APP_ID }} + private-key: ${{ secrets.GH_AUTO_APPROVE_APP_PRIVATE_KEY }} + owner: ${{ steps.create_variable.outputs.owner }} + repositories: ${{ steps.create_variable.outputs.repo }} + - name: Merge Pull request + if: steps.gitlint.outputs.exit_code == '0' && steps.generate_token.outputs.token != '' + shell: bash + env: + GITHUB_TOKEN: ${{ steps.generate_token.outputs.token }} + run: | + # GitHub CLI api + # https://cli.github.com/manual/gh_api + gh api \ + --method PUT \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2026-03-10" \ + /repos/${GITHUB_REPOSITORY}/pulls/${{ github.event.number }}/merge \ + -f 'merge_method=rebase' diff --git a/.gitlint_auto_approve b/.gitlint_auto_approve new file mode 100644 index 0000000000..d79e9b21f5 --- /dev/null +++ b/.gitlint_auto_approve @@ -0,0 +1,15 @@ +[general] +extra-path=ci/gitlint/rules_auto_approve +regex-style-search=true +ignore=body-is-missing,body-max-line-length + +# default 72 +[title-max-length] +line-length=72 + +# Empty bodies are fine +[body-min-length] +min-length=0 + +[UC-flake] +filepath=flake.lock diff --git a/Cargo.lock b/Cargo.lock index dd3501a2ac..2aa24483b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -512,7 +512,7 @@ checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "cloud-hypervisor" -version = "51.1.0" +version = "51.2.0" dependencies = [ "anyhow", "api_client", diff --git a/block/src/aligned_operation.rs b/block/src/aligned_operation.rs new file mode 100644 index 0000000000..3081b7b705 --- /dev/null +++ b/block/src/aligned_operation.rs @@ -0,0 +1,90 @@ +// Copyright (c) 2026 Meta Platforms, Inc. and affiliates. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::alloc::{Layout, alloc_zeroed, dealloc}; +use std::io; + +use vm_memory::GuestAddress; + +/// Owns an aligned bounce buffer used when a guest descriptor's host VA +/// does not meet the disk backend's alignment requirement. +#[derive(Debug)] +pub struct AlignedOperation { + data_addr: GuestAddress, + aligned_ptr: *mut u8, + size: usize, + layout: Layout, +} + +impl AlignedOperation { + /// Allocate a zero-initialized buffer of `size` bytes aligned to + /// `alignment`. Returns `InvalidInput` if `size` is zero; + /// `alignment` must be a power of two and not exceed `isize::MAX` + /// after rounding up. + pub fn new(data_addr: GuestAddress, size: usize, alignment: usize) -> io::Result { + if size == 0 { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "AlignedOperation requires a non-zero size", + )); + } + let layout = Layout::from_size_align(size, alignment) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + // SAFETY: size is non-zero (checked above) and Layout::from_size_align + // rejects alignments that are not a power of two or that overflow. + let aligned_ptr = unsafe { alloc_zeroed(layout) }; + if aligned_ptr.is_null() { + return Err(io::Error::last_os_error()); + } + Ok(Self { + data_addr, + aligned_ptr, + size, + layout, + }) + } + + /// Gets the raw pointer to the aligned buffer. + pub fn as_mut_ptr(&mut self) -> *mut u8 { + self.aligned_ptr + } + + /// Returns the aligned buffer as a slice. + pub fn as_bytes(&self) -> &[u8] { + // SAFETY: `new` allocates `size` bytes via alloc_zeroed (so they + // are initialized) and AlignedOperation owns the buffer + // exclusively. + unsafe { std::slice::from_raw_parts(self.aligned_ptr, self.size) } + } + + /// Returns the aligned buffer as a mutable slice. + pub fn as_bytes_mut(&mut self) -> &mut [u8] { + // SAFETY: same invariant as as_bytes; &mut self rules out other + // simultaneous borrows. + unsafe { std::slice::from_raw_parts_mut(self.aligned_ptr, self.size) } + } + + /// Returns the guest address for this op. + pub fn data_addr(&self) -> GuestAddress { + self.data_addr + } +} + +impl Drop for AlignedOperation { + fn drop(&mut self) { + // SAFETY: `new` is the only constructor, and it stores a pointer + // returned by `alloc_zeroed` paired with the exact `layout` used + // for that allocation. Ownership has not escaped (the type is + // neither `Clone` nor `Copy`). + unsafe { + dealloc(self.aligned_ptr, self.layout); + } + } +} + +// SAFETY: AlignedOperation owns its heap allocation exclusively (no Clone/ +// Copy, no shared aliases) and the allocation's lifetime is tied to the +// value's. Moving an AlignedOperation between threads transfers that +// ownership; the same rationale Box uses for its Send impl. +unsafe impl Send for AlignedOperation {} diff --git a/block/src/lib.rs b/block/src/lib.rs index 9f78cefd9e..504f4cc3c2 100644 --- a/block/src/lib.rs +++ b/block/src/lib.rs @@ -8,6 +8,7 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause +mod aligned_operation; pub mod async_io; pub mod fcntl; pub mod fixed_vhd; @@ -28,7 +29,7 @@ pub mod vhd; pub mod vhdx; pub mod vhdx_sync; -use std::alloc::{Layout, alloc_zeroed, dealloc}; +use std::alloc::{Layout, alloc_zeroed}; use std::collections::VecDeque; use std::fmt::{self, Debug}; use std::fs::File; @@ -40,6 +41,7 @@ use std::str::FromStr; use std::time::Instant; use std::{cmp, result}; +pub use aligned_operation::AlignedOperation; #[cfg(feature = "io_uring")] use io_uring::{IoUring, Probe, opcode}; use libc::{S_IFBLK, S_IFMT, ioctl}; @@ -232,14 +234,6 @@ fn sector( const DEFAULT_DESCRIPTOR_VEC_SIZE: usize = 32; -#[derive(Debug)] -pub struct AlignedOperation { - origin_ptr: u64, - aligned_ptr: u64, - size: usize, - layout: Layout, -} - pub struct BatchRequest { pub offset: libc::off_t, pub iovecs: SmallVec<[libc::iovec; DEFAULT_DESCRIPTOR_VEC_SIZE]>, @@ -473,31 +467,19 @@ impl Request { let iov_base = if (origin_ptr.as_ptr() as u64).is_multiple_of(SECTOR_SIZE) { origin_ptr.as_ptr() as *mut libc::c_void } else { - let layout = Layout::from_size_align(data_len, SECTOR_SIZE as usize).unwrap(); - // SAFETY: layout has non-zero size - let aligned_ptr = unsafe { alloc_zeroed(layout) }; - if aligned_ptr.is_null() { - return Err(ExecuteError::TemporaryBufferAllocation( - io::Error::last_os_error(), - )); - } + let mut aligned_op = + AlignedOperation::new(data_addr, data_len, SECTOR_SIZE as usize) + .map_err(ExecuteError::TemporaryBufferAllocation)?; // We need to perform the copy beforehand in case we're writing // data out. if request_type == RequestType::Out { - // SAFETY: destination buffer has been allocated with - // the proper size. - unsafe { std::ptr::copy(origin_ptr.as_ptr(), aligned_ptr, data_len) }; + mem.read_slice(aligned_op.as_bytes_mut(), data_addr) + .map_err(ExecuteError::Read)?; } - // Store both origin and aligned pointers for complete_async() - // to process them. - self.aligned_operations.push(AlignedOperation { - origin_ptr: origin_ptr.as_ptr() as u64, - aligned_ptr: aligned_ptr as u64, - size: data_len, - layout, - }); + let aligned_ptr = aligned_op.as_mut_ptr(); + self.aligned_operations.push(aligned_op); aligned_ptr as *mut libc::c_void }; @@ -639,31 +621,17 @@ impl Request { Ok(ret) } - pub fn complete_async(&mut self) -> result::Result<(), Error> { - for aligned_operation in self.aligned_operations.drain(..) { + pub fn complete_async( + &mut self, + mem: &vm_memory::GuestMemoryMmap, + ) -> result::Result<(), Error> { + for aligned_op in self.aligned_operations.drain(..) { // We need to perform the copy after the data has been read inside // the aligned buffer in case we're reading data in. if self.request_type == RequestType::In { - // SAFETY: origin buffer has been allocated with the - // proper size. - unsafe { - std::ptr::copy( - aligned_operation.aligned_ptr as *const u8, - aligned_operation.origin_ptr as *mut u8, - aligned_operation.size, - ); - }; + mem.write_slice(aligned_op.as_bytes(), aligned_op.data_addr()) + .map_err(Error::GuestMemory)?; } - - // Free the temporary aligned buffer. - // SAFETY: aligned_ptr was allocated by alloc_zeroed with the same - // layout - unsafe { - dealloc( - aligned_operation.aligned_ptr as *mut u8, - aligned_operation.layout, - ); - }; } Ok(()) diff --git a/block/src/raw_async.rs b/block/src/raw_async.rs index 539aaa9095..45fb86d1a1 100644 --- a/block/src/raw_async.rs +++ b/block/src/raw_async.rs @@ -197,6 +197,14 @@ impl AsyncIo for RawFileAsync { let (submitter, mut sq, _) = self.io_uring.split(); let mut submitted = false; + // Refuse the whole batch if it can't fit in the SQ to avoid having to unroll a partially + // successful push. + if batch_request.len() > sq.capacity() - sq.len() { + return Err(AsyncIoError::SubmitBatchRequests(Error::other( + "io_uring submission queue is full", + ))); + } + for req in batch_request { match req.request_type { RequestType::In => { diff --git a/ci/README.auto.approve.md b/ci/README.auto.approve.md new file mode 100644 index 0000000000..a38d3e6e7e --- /dev/null +++ b/ci/README.auto.approve.md @@ -0,0 +1,43 @@ +# Flake bump auto approve + +## Description + +We add a github workflow `Flake bump`. +First job of this workflow checks if a merge request contains only one commit which updates the `flake.lock` file. +If this condition is met the second job approve this merge request and automatically merge it. +The approval is done with a dedicated GitHubApp. + +## Install + +* Follow this guide: https://docs.github.com/en/apps/creating-github-apps/registering-a-github-app/registering-a-github-app +* Create a GitHub app `auto-approve-app` in your GH organization + * github.com/github-organization/ -> Settings -> Developer Settings -> GitHub Apps -> New GitHub App + * Add a name and Homepage URL + * Add Repository Permissions + * Actions: RO + * Contents: RW + * Metadata: RO + * Pull Requests: RW + * Workflows: RW + +* Install this app into your organization + * github.com/github-organization/ -> Settings -> Developer Settings -> GitHub Apps -> Select `auto-approve-app` -> Install App + * Only select repositories: + * repository-name + +* Find app_id + * github.com/github-organization/ -> Settings -> Developer Settings -> GitHub Apps -> Select `auto-approve-app` + * you find the app_id in the `General` section + +* Create app client secret + * github.com/github-organization/ -> Settings -> Developer Settings -> GitHub Apps -> Select `auto-approve-app` -> Client secrets + * The private key will be downloaded using your browser + * Save it in 1Password or vault + +* Create two organization secrets: + * GH_AUTO_APPROVE_APP_ID + * GH_AUTO_APPROVE_APP_PRIVATE_KEY + +* Add Github App `auto-approve-app` to your branch ruleset. + * github.com/github-organization/repository -> Settings -> Rules -> Rulesets -> rule name -> Bypass list -> Add bypass + * This allows the Github App `auto-approve-app` to merge the MRs even if other conditions of the ruleset are not met. diff --git a/ci/gitlint/rules_auto_approve/only-flake.py b/ci/gitlint/rules_auto_approve/only-flake.py new file mode 100644 index 0000000000..567323869f --- /dev/null +++ b/ci/gitlint/rules_auto_approve/only-flake.py @@ -0,0 +1,61 @@ +# Copyright © 2026 Cyberus Technology GmbH +# +# SPDX-License-Identifier: Apache-2.0 +# +from gitlint.options import ListOption, StrOption +from gitlint.rules import CommitRule, RuleViolation + +class SingleSpecificFile(CommitRule): + """Reject commits which modifies files other than those specified""" + id = "UC-flake" + name = "body-require-single-specific-file" + description = "Commit must change exactly one specific file" + target = None # Applies to entire commit + options_spec = [ + StrOption( + "filepath", + "flake.lock", + "The file path to check" + ) + ] + + def validate(self, commit): + changed_files = getattr(commit, "changed_files", None) + if changed_files is None: + # Newer gitlint commit objects expose the touched paths directly via + # `changed_files`. Older variants may only expose + # `changed_files_stats`, a mapping keyed by changed path, so we fall + # back to its keys when `changed_files` is unavailable. + changed_files_stats = getattr(commit, "changed_files_stats", {}) + changed_files = list(changed_files_stats.keys()) + + if len(changed_files) != 1: + return [RuleViolation("commit-changes-multiple-files-or-none", f"Commit changes {len(changed_files)} files, expected exactly 1: {', '.join(changed_files)}")] + + filepath = self.options["filepath"].value + if changed_files[0] != filepath: + return [RuleViolation("commit-wrong-file", f"Commit changes '{changed_files[0]}', expected only '{filepath}'")] + +#################### +# Usage of this rule +#################### +# +# .gitlint_auto_approve file +# [general] +# extra-path=ci/gitlint/rules_auto_approve +# regex-style-search=true +# ignore=body-is-missing,body-max-line-length + +# # default 72 +# [title-max-length] +# line-length=72 + +# # Empty bodies are fine +# [body-min-length] +# min-length=0 + +# [UC-flake] +# filepath=flake.lock + +## run with +# nix run nixpkgs#gitlint -- --commits origin/main.. -C .gitlint_auto_approve diff --git a/cloud-hypervisor/Cargo.toml b/cloud-hypervisor/Cargo.toml index ebb7c8d95d..39b4d1b871 100644 --- a/cloud-hypervisor/Cargo.toml +++ b/cloud-hypervisor/Cargo.toml @@ -7,7 +7,7 @@ edition = "2024" homepage = "https://github.com/cloud-hypervisor/cloud-hypervisor" license = "Apache-2.0 AND BSD-3-Clause" name = "cloud-hypervisor" -version = "51.1.0" +version = "51.2.0" # Minimum buildable version: # Keep in sync with version in .github/workflows/build.yaml # Policy on MSRV (see #4318): diff --git a/fuzz/fuzz_targets/balloon.rs b/fuzz/fuzz_targets/balloon.rs index b745cf6187..58b9b30582 100644 --- a/fuzz/fuzz_targets/balloon.rs +++ b/fuzz/fuzz_targets/balloon.rs @@ -95,15 +95,16 @@ fuzz_target!(|bytes: &[u8]| -> Corpus { reporting_queue_evt.write(1).unwrap(); balloon - .activate( - guest_memory, - Arc::new(NoopVirtioInterrupt {}), - vec![ + .activate(virtio_devices::ActivationContext { + mem: guest_memory, + interrupt_cb: Arc::new(NoopVirtioInterrupt {}), + queues: vec![ (0, inflate_q, inflate_evt), (1, deflate_q, deflate_evt), (2, reporting_q, reporting_evt), ], - ) + device_status: Arc::new(std::sync::atomic::AtomicU8::new(0)), + }) .ok(); // Wait for the events to finish and balloon device worker thread to return diff --git a/fuzz/fuzz_targets/block.rs b/fuzz/fuzz_targets/block.rs index 7d1fbdf38f..12a4dcc726 100644 --- a/fuzz/fuzz_targets/block.rs +++ b/fuzz/fuzz_targets/block.rs @@ -89,11 +89,12 @@ fuzz_target!(|bytes: &[u8]| -> Corpus { queue_evt.write(1).unwrap(); block - .activate( - guest_memory, - Arc::new(NoopVirtioInterrupt {}), - vec![(0, q, evt)], - ) + .activate(virtio_devices::ActivationContext { + mem: guest_memory, + interrupt_cb: Arc::new(NoopVirtioInterrupt {}), + queues: vec![(0, q, evt)], + device_status: Arc::new(std::sync::atomic::AtomicU8::new(0)), + }) .ok(); // Wait for the events to finish and block device worker thread to return diff --git a/fuzz/fuzz_targets/console.rs b/fuzz/fuzz_targets/console.rs index 4b3a49df91..e27331ed01 100644 --- a/fuzz/fuzz_targets/console.rs +++ b/fuzz/fuzz_targets/console.rs @@ -128,11 +128,12 @@ fuzz_target!(|bytes: &[u8]| -> Corpus { pipe_tx.write_all(console_input_bytes).unwrap(); // To use fuzzed data; console - .activate( - guest_memory, - Arc::new(NoopVirtioInterrupt {}), - vec![(0, input_queue, input_evt), (1, output_queue, output_evt)], - ) + .activate(virtio_devices::ActivationContext { + mem: guest_memory, + interrupt_cb: Arc::new(NoopVirtioInterrupt {}), + queues: vec![(0, input_queue, input_evt), (1, output_queue, output_evt)], + device_status: Arc::new(std::sync::atomic::AtomicU8::new(0)), + }) .unwrap(); // Wait for the events to finish and console device worker thread to return diff --git a/fuzz/fuzz_targets/iommu.rs b/fuzz/fuzz_targets/iommu.rs index 8c9f26b262..a10640487f 100644 --- a/fuzz/fuzz_targets/iommu.rs +++ b/fuzz/fuzz_targets/iommu.rs @@ -107,14 +107,15 @@ fuzz_target!(|bytes: &[u8]| -> Corpus { request_queue_evt.write(1).unwrap(); iommu - .activate( - guest_memory, - Arc::new(NoopVirtioInterrupt {}), - vec![ + .activate(virtio_devices::ActivationContext { + mem: guest_memory, + interrupt_cb: Arc::new(NoopVirtioInterrupt {}), + queues: vec![ (0, request_queue, request_evt), (0, _event_queue, _event_evt), ], - ) + device_status: Arc::new(std::sync::atomic::AtomicU8::new(0)), + }) .ok(); // Wait for the events to finish and vIOMMU device worker thread to return diff --git a/fuzz/fuzz_targets/mem.rs b/fuzz/fuzz_targets/mem.rs index 57fc9a91dd..73ec11b025 100644 --- a/fuzz/fuzz_targets/mem.rs +++ b/fuzz/fuzz_targets/mem.rs @@ -105,11 +105,12 @@ fuzz_target!(|bytes: &[u8]| -> Corpus { queue_evt.write(1).unwrap(); virtio_mem - .activate( - guest_memory, - Arc::new(NoopVirtioInterrupt {}), - vec![(0, q, evt)], - ) + .activate(virtio_devices::ActivationContext { + mem: guest_memory, + interrupt_cb: Arc::new(NoopVirtioInterrupt {}), + queues: vec![(0, q, evt)], + device_status: Arc::new(std::sync::atomic::AtomicU8::new(0)), + }) .ok(); // Wait for the events to finish and virtio-mem device worker thread to return diff --git a/fuzz/fuzz_targets/net.rs b/fuzz/fuzz_targets/net.rs index 30968d2a47..df9a1dce5a 100644 --- a/fuzz/fuzz_targets/net.rs +++ b/fuzz/fuzz_targets/net.rs @@ -143,11 +143,12 @@ fuzz_target!(|bytes: &[u8]| -> Corpus { input_queue_evt.write(1).unwrap(); output_queue_evt.write(1).unwrap(); - net.activate( - guest_memory, - Arc::new(NoopVirtioInterrupt {}), - vec![(0, input_queue, input_evt), (1, output_queue, output_evt)], - ) + net.activate(virtio_devices::ActivationContext { + mem: guest_memory, + interrupt_cb: Arc::new(NoopVirtioInterrupt {}), + queues: vec![(0, input_queue, input_evt), (1, output_queue, output_evt)], + device_status: Arc::new(std::sync::atomic::AtomicU8::new(0)), + }) .unwrap(); // Wait for the events to finish and net device worker thread to return diff --git a/fuzz/fuzz_targets/pmem.rs b/fuzz/fuzz_targets/pmem.rs index a8fcb7a774..0bd083a1c2 100644 --- a/fuzz/fuzz_targets/pmem.rs +++ b/fuzz/fuzz_targets/pmem.rs @@ -61,11 +61,12 @@ fuzz_target!(|bytes: &[u8]| -> Corpus { // Kick the 'queue' event before activate the pmem device queue_evt.write(1).unwrap(); - pmem.activate( - guest_memory, - Arc::new(NoopVirtioInterrupt {}), - vec![(0, q, evt)], - ) + pmem.activate(virtio_devices::ActivationContext { + mem: guest_memory, + interrupt_cb: Arc::new(NoopVirtioInterrupt {}), + queues: vec![(0, q, evt)], + device_status: Arc::new(std::sync::atomic::AtomicU8::new(0)), + }) .ok(); // Wait for the events to finish and pmem device worker thread to return diff --git a/fuzz/fuzz_targets/rng.rs b/fuzz/fuzz_targets/rng.rs index 8d5ffe35b3..13548664a8 100644 --- a/fuzz/fuzz_targets/rng.rs +++ b/fuzz/fuzz_targets/rng.rs @@ -99,11 +99,12 @@ fuzz_target!(|bytes: &[u8]| -> Corpus { // Kick the 'queue' event before activate the rng device queue_evt.write(1).unwrap(); - rng.activate( - guest_memory, - Arc::new(NoopVirtioInterrupt {}), - vec![(0, q, evt)], - ) + rng.activate(virtio_devices::ActivationContext { + mem: guest_memory, + interrupt_cb: Arc::new(NoopVirtioInterrupt {}), + queues: vec![(0, q, evt)], + device_status: Arc::new(std::sync::atomic::AtomicU8::new(0)), + }) .ok(); // Wait for the events to finish and rng device worker thread to return diff --git a/fuzz/fuzz_targets/vsock.rs b/fuzz/fuzz_targets/vsock.rs index 144b8b4057..33ebe78886 100644 --- a/fuzz/fuzz_targets/vsock.rs +++ b/fuzz/fuzz_targets/vsock.rs @@ -108,11 +108,12 @@ fuzz_target!(|bytes: &[u8]| -> Corpus { .unwrap(); vsock - .activate( - guest_memory, - Arc::new(NoopVirtioInterrupt {}), - vec![(0, q, evt)], - ) + .activate(virtio_devices::ActivationContext { + mem: guest_memory, + interrupt_cb: Arc::new(NoopVirtioInterrupt {}), + queues: vec![(0, q, evt)], + device_status: Arc::new(std::sync::atomic::AtomicU8::new(0)), + }) .ok(); // Wait for the events to finish and vsock device worker thread to return diff --git a/fuzz/fuzz_targets/watchdog.rs b/fuzz/fuzz_targets/watchdog.rs index f203a228f9..31361755df 100644 --- a/fuzz/fuzz_targets/watchdog.rs +++ b/fuzz/fuzz_targets/watchdog.rs @@ -64,11 +64,12 @@ fuzz_target!(|bytes: &[u8]| -> Corpus { queue_evt.write(1).unwrap(); watchdog - .activate( - guest_memory, - Arc::new(NoopVirtioInterrupt {}), - vec![(0, q, evt)], - ) + .activate(virtio_devices::ActivationContext { + mem: guest_memory, + interrupt_cb: Arc::new(NoopVirtioInterrupt {}), + queues: vec![(0, q, evt)], + device_status: Arc::new(std::sync::atomic::AtomicU8::new(0)), + }) .ok(); // Wait for the events to finish and watchdog device worker thread to return diff --git a/net_util/src/queue_pair.rs b/net_util/src/queue_pair.rs index 86a1c758dc..c0b8825e71 100644 --- a/net_util/src/queue_pair.rs +++ b/net_util/src/queue_pair.rs @@ -51,7 +51,13 @@ impl TxVirtio { let mut retry_write = false; let mut rate_limit_reached = false; - while let Some(mut desc_chain) = queue.pop_descriptor_chain(mem) { + loop { + let mut iter = queue + .iter(mem) + .map_err(NetQueuePairError::QueueIteratorFailed)?; + let Some(mut desc_chain) = iter.next() else { + break; + }; if rate_limit_reached { queue.go_to_previous_position(); break; @@ -180,7 +186,13 @@ impl RxVirtio { let mut exhausted_descs = true; let mut rate_limit_reached = false; - while let Some(mut desc_chain) = queue.pop_descriptor_chain(mem) { + loop { + let mut iter = queue + .iter(mem) + .map_err(NetQueuePairError::QueueIteratorFailed)?; + let Some(mut desc_chain) = iter.next() else { + break; + }; if rate_limit_reached { exhausted_descs = false; queue.go_to_previous_position(); diff --git a/release-notes.md b/release-notes.md index 1f2ff74152..225039cbcf 100644 --- a/release-notes.md +++ b/release-notes.md @@ -1,3 +1,4 @@ +- [v51.2](#v512) - [v51.1](#v511) - [v51.0](#v510) - [Security Fixes](#security-fixes) @@ -419,6 +420,12 @@ - [Unit testing](#unit-testing) - [Integration tests parallelization](#integration-tests-parallelization) +# v51.2 + +This is a point release containing security fixes to a use-after-free +vulnerability in the `virtio-block` async I/O completion path +(#8220). Details can be found in GHSA-f47p-p25q-83rh (CVE-2026-45782). + # v51.1 This is a bug fix release. The following issues have been addressed: diff --git a/virtio-devices/src/balloon.rs b/virtio-devices/src/balloon.rs index 3db6832617..bb9c46cbc8 100644 --- a/virtio-devices/src/balloon.rs +++ b/virtio-devices/src/balloon.rs @@ -590,12 +590,13 @@ impl VirtioDevice for Balloon { } } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - mut queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + mut queues, + .. + } = context; self.common.activate(&queues, interrupt_cb.clone())?; let (kill_evt, pause_evt) = self.common.dup_eventfds(); diff --git a/virtio-devices/src/block.rs b/virtio-devices/src/block.rs index 805a7c6155..c035d571f8 100644 --- a/virtio-devices/src/block.rs +++ b/virtio-devices/src/block.rs @@ -13,7 +13,7 @@ use std::num::Wrapping; use std::ops::Deref; use std::os::unix::io::AsRawFd; use std::path::PathBuf; -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering}; use std::sync::{Arc, Barrier}; use std::{io, result}; @@ -161,6 +161,7 @@ struct BlockEpollHandler { host_cpus: Option>, acked_features: u64, disable_sector0_writes: bool, + device_status: Arc, } fn has_feature(features: u64, feature_flag: u64) -> bool { @@ -168,6 +169,10 @@ fn has_feature(features: u64, feature_flag: u64) -> bool { } impl BlockEpollHandler { + fn needs_reset(&self) -> bool { + (self.device_status.load(Ordering::Acquire) & crate::DEVICE_NEEDS_RESET as u8) != 0 + } + fn check_request( features: u64, request: &Request, @@ -192,12 +197,71 @@ impl BlockEpollHandler { Ok(()) } + fn handle_queue_iterator_error(&mut self, err: &virtio_queue::Error) { + // The guest submitted a corrupted VirtQ request, and the error + // was logged during queue processing. We cannot just ignore the + // error, as the guest could continue spamming the VMM with bad + // requests, triggering excessive error logging. So we mark + // the device "NEEDS_RESET", effectively stopping all request + // processing (see self.needs_reset() usage) until the guest + // resets and reactivates the device. + + warn!( + "Corrupted request detected (virtqueue error: {err:?}). \ +Setting device status to 'NEEDS_RESET' and stopping processing queues until reset." + ); + + self.set_needs_reset(); + } + + fn set_needs_reset(&mut self) { + self.device_status + .fetch_or(crate::DEVICE_NEEDS_RESET as u8, Ordering::SeqCst); + + // Let the guest know that the device status has changed. + if let Err(e) = self.interrupt_cb.trigger(VirtioInterruptType::Config) { + error!("Failed to signal config interrupt: {e:?}"); + } + } + + // A spec-compliant driver never reuses a virtqueue head_index while the + // corresponding chain is still available (virtio 1.x §2.7.13.4). + // Double check the guest driver is behaving. + fn is_head_in_flight( + inflight: &VecDeque<(u16, Request)>, + batch: &[(u16, Request)], + head: u16, + ) -> bool { + batch.iter().any(|(h, _)| *h == head) || inflight.iter().any(|(h, _)| *h == head) + } + fn process_queue_submit(&mut self) -> Result<()> { + if self.needs_reset() { + return Ok(()); + } let queue = &mut self.queue; let mut batch_requests = Vec::new(); let mut batch_inflight_requests = Vec::new(); - while let Some(mut desc_chain) = queue.pop_descriptor_chain(self.mem.memory()) { + loop { + let mut desc_chain = match queue.iter(self.mem.memory()) { + Ok(mut iter) => match iter.next() { + Some(c) => c, + None => break, + }, + Err(err) => { + self.handle_queue_iterator_error(&err); + return Ok(()); + } + }; + + let head = desc_chain.head_index(); + if Self::is_head_in_flight(&self.inflight_requests, &batch_inflight_requests, head) { + warn!("Guest reused virtio-blk head_index {head} while the chain was used"); + self.set_needs_reset(); + return Ok(()); + } + let mut request = Request::parse(&mut desc_chain, self.access_platform.as_deref()) .map_err(Error::RequestParsing)?; @@ -282,8 +346,11 @@ impl BlockEpollHandler { ) } } + batch_inflight_requests.push((desc_chain.head_index(), request)); + } else { + self.inflight_requests + .push_back((desc_chain.head_index(), request)); } - batch_inflight_requests.push((desc_chain.head_index(), request)); } else { let status = match result { Ok(_) => VIRTIO_BLK_S_OK, @@ -380,6 +447,9 @@ impl BlockEpollHandler { } fn process_queue_complete(&mut self) -> Result<()> { + if self.needs_reset() { + return Ok(()); + } let mem = self.mem.memory(); let mut read_bytes = Wrapping(0); let mut write_bytes = Wrapping(0); @@ -391,7 +461,9 @@ impl BlockEpollHandler { let mut request = self.find_inflight_request(desc_index)?; - request.complete_async().map_err(Error::RequestCompleting)?; + request + .complete_async(&mem) + .map_err(Error::RequestCompleting)?; let latency = request.start.elapsed().as_micros() as u64; let read_ops_last = self.counters.read_ops.load(Ordering::Relaxed); @@ -662,6 +734,7 @@ pub struct Block { serial: Vec, queue_affinity: BTreeMap>, disable_sector0_writes: bool, + device_status: Arc, } #[derive(Serialize, Deserialize)] @@ -820,6 +893,7 @@ impl Block { serial, queue_affinity, disable_sector0_writes, + device_status: Arc::new(AtomicU8::new(0)), }) } @@ -1011,12 +1085,14 @@ impl VirtioDevice for Block { self.update_writeback(); } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - mut queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + mut queues, + device_status, + } = context; + self.device_status = device_status; // See if the guest didn't ack the device being read-only. // If so, warn and pretend it did. let original_acked_features = self.common.acked_features; @@ -1078,6 +1154,7 @@ impl VirtioDevice for Block { host_cpus: self.queue_affinity.get(&queue_idx).cloned(), acked_features: self.common.acked_features, disable_sector0_writes: self.disable_sector0_writes, + device_status: self.device_status.clone(), }; let paused = self.common.paused.clone(); diff --git a/virtio-devices/src/console.rs b/virtio-devices/src/console.rs index c8a9f08a02..760864b3a5 100644 --- a/virtio-devices/src/console.rs +++ b/virtio-devices/src/console.rs @@ -703,12 +703,13 @@ impl VirtioDevice for Console { self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data); } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - mut queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + mut queues, + .. + } = context; self.common.activate(&queues, interrupt_cb.clone())?; self.resizer .acked_features diff --git a/virtio-devices/src/device.rs b/virtio-devices/src/device.rs index 8289cdf116..90958ecddc 100644 --- a/virtio-devices/src/device.rs +++ b/virtio-devices/src/device.rs @@ -9,7 +9,7 @@ use std::collections::HashMap; use std::io::Write; use std::num::Wrapping; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU8, Ordering}; use std::sync::{Arc, Barrier}; use std::thread; @@ -54,6 +54,13 @@ pub struct VirtioSharedMemoryList { pub region_list: Vec, } +pub struct ActivationContext { + pub mem: GuestMemoryAtomic, + pub interrupt_cb: Arc, + pub queues: Vec<(usize, Queue, EventFd)>, + pub device_status: Arc, +} + /// Trait for virtio devices to be driven by a virtio transport. /// /// The lifecycle of a virtio device is to be moved to a virtio transport, which will then query the @@ -95,12 +102,7 @@ pub trait VirtioDevice: Send { } /// Activates this device for real usage. - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_evt: Arc, - queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult; + fn activate(&mut self, context: ActivationContext) -> ActivateResult; /// Optionally deactivates this device and returns ownership of the guest memory map, interrupt /// event, and queue events. diff --git a/virtio-devices/src/iommu.rs b/virtio-devices/src/iommu.rs index f4812b04fb..87f8121696 100644 --- a/virtio-devices/src/iommu.rs +++ b/virtio-devices/src/iommu.rs @@ -1075,12 +1075,13 @@ impl VirtioDevice for Iommu { self.update_bypass(); } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - mut queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + mut queues, + .. + } = context; self.common.activate(&queues, interrupt_cb.clone())?; let (kill_evt, pause_evt) = self.common.dup_eventfds(); diff --git a/virtio-devices/src/lib.rs b/virtio-devices/src/lib.rs index d2d428299d..7084879a06 100644 --- a/virtio-devices/src/lib.rs +++ b/virtio-devices/src/lib.rs @@ -42,8 +42,8 @@ pub use self::balloon::Balloon; pub use self::block::{Block, BlockState}; pub use self::console::{Console, ConsoleResizer, Endpoint}; pub use self::device::{ - DmaRemapping, PostMigrationAnnouncer, VirtioCommon, VirtioDevice, VirtioInterrupt, - VirtioInterruptType, VirtioSharedMemoryList, + ActivationContext, DmaRemapping, PostMigrationAnnouncer, VirtioCommon, VirtioDevice, + VirtioInterrupt, VirtioInterruptType, VirtioSharedMemoryList, }; pub use self::epoll_helper::{ EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, EpollHelperHandler, @@ -66,6 +66,7 @@ const DEVICE_ACKNOWLEDGE: u32 = 0x01; const DEVICE_DRIVER: u32 = 0x02; const DEVICE_DRIVER_OK: u32 = 0x04; const DEVICE_FEATURES_OK: u32 = 0x08; +const DEVICE_NEEDS_RESET: u32 = 0x40; const DEVICE_FAILED: u32 = 0x80; const VIRTIO_F_RING_INDIRECT_DESC: u32 = 28; diff --git a/virtio-devices/src/mem.rs b/virtio-devices/src/mem.rs index 936fdbe42a..aed8ed48d2 100644 --- a/virtio-devices/src/mem.rs +++ b/virtio-devices/src/mem.rs @@ -950,12 +950,13 @@ impl VirtioDevice for Mem { self.read_config_from_slice(self.config.lock().unwrap().as_slice(), offset, data); } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - mut queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + mut queues, + .. + } = context; self.common.activate(&queues, interrupt_cb.clone())?; let (kill_evt, pause_evt) = self.common.dup_eventfds(); diff --git a/virtio-devices/src/net.rs b/virtio-devices/src/net.rs index 97e6349cec..9d5c2f7799 100644 --- a/virtio-devices/src/net.rs +++ b/virtio-devices/src/net.rs @@ -10,7 +10,7 @@ use std::net::IpAddr; use std::num::Wrapping; use std::ops::Deref; use std::os::unix::io::{AsRawFd, RawFd}; -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering}; use std::sync::{Arc, Barrier}; use std::{result, thread}; @@ -176,6 +176,7 @@ struct NetEpollHandler { queue_index_base: u16, queue_pair: (Queue, Queue), queue_evt_pair: (EventFd, EventFd), + device_status: Arc, } impl NetEpollHandler { @@ -189,6 +190,9 @@ impl NetEpollHandler { } fn handle_rx_event(&mut self) -> result::Result<(), DeviceError> { + if self.needs_reset() { + return Ok(()); + } let queue_evt = &self.queue_evt_pair.0; if let Err(e) = queue_evt.read() { error!("Failed to get rx queue event: {e:?}"); @@ -217,12 +221,43 @@ impl NetEpollHandler { Ok(()) } + fn handle_queue_iterator_error(&mut self, err: &virtio_queue::Error) { + // The guest submitted a corrupted VirtQ request, and the error + // was logged during queue processing. We cannot just ignore the + // error, as the guest could continue spamming the VMM with bad + // requests, triggering excessive error logging. So we mark + // the device "NEEDS_RESET", effectively stopping all request + // processing (see self.needs_reset() usage) until the guest + // resets and reactivates the device. + + warn!( + "Corrupted request detected (virtqueue error: {err:?}). \ +Setting device status to 'NEEDS_RESET' and stopping processing queues until reset." + ); + + self.device_status + .fetch_or(crate::DEVICE_NEEDS_RESET as u8, Ordering::SeqCst); + + // Let the guest know that the device status has changed. + if let Err(e) = self.interrupt_cb.trigger(VirtioInterruptType::Config) { + error!("Failed to signal config interrupt: {e:?}"); + } + } + fn process_tx(&mut self) -> result::Result<(), DeviceError> { - if self + if self.needs_reset() { + return Ok(()); + } + let res = self .net - .process_tx(&self.mem.memory(), &mut self.queue_pair.1) - .map_err(DeviceError::NetQueuePair)? - { + .process_tx(&self.mem.memory(), &mut self.queue_pair.1); + + if let Err(net_util::NetQueuePairError::QueueIteratorFailed(err)) = res { + self.handle_queue_iterator_error(&err); + return Ok(()); + } + + if res.map_err(DeviceError::NetQueuePair)? { self.signal_used_queue(self.queue_index_base + 1)?; debug!("Signalling TX queue"); } else { @@ -246,11 +281,19 @@ impl NetEpollHandler { } fn handle_rx_tap_event(&mut self) -> result::Result<(), DeviceError> { - if self + if self.needs_reset() { + return Ok(()); + } + let res = self .net - .process_rx(&self.mem.memory(), &mut self.queue_pair.0) - .map_err(DeviceError::NetQueuePair)? - { + .process_rx(&self.mem.memory(), &mut self.queue_pair.0); + + if let Err(net_util::NetQueuePairError::QueueIteratorFailed(err)) = res { + self.handle_queue_iterator_error(&err); + return Ok(()); + } + + if res.map_err(DeviceError::NetQueuePair)? { self.signal_used_queue(self.queue_index_base)?; trace!("Signalling RX queue"); } else { @@ -300,6 +343,10 @@ impl NetEpollHandler { Ok(()) } + + fn needs_reset(&self) -> bool { + (self.device_status.load(Ordering::Acquire) & crate::DEVICE_NEEDS_RESET as u8) != 0 + } } impl EpollHelperHandler for NetEpollHandler { @@ -411,6 +458,7 @@ pub struct Net { seccomp_action: SeccompAction, rate_limiter_config: Option, exit_evt: EventFd, + device_status: Arc, } #[derive(Serialize, Deserialize)] @@ -577,6 +625,7 @@ impl Net { seccomp_action, rate_limiter_config, exit_evt, + device_status: Arc::new(AtomicU8::new(0)), }) } @@ -827,12 +876,14 @@ impl VirtioDevice for Net { self.read_config_from_slice(config.as_slice(), offset, data); } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - mut queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + mut queues, + device_status, + } = context; + self.device_status = device_status; self.common.activate(&queues, interrupt_cb.clone())?; let num_queues = queues.len(); @@ -946,6 +997,7 @@ impl VirtioDevice for Net { interrupt_cb: interrupt_cb.clone(), kill_evt, pause_evt, + device_status: self.device_status.clone(), }; let paused = self.common.paused.clone(); @@ -1170,6 +1222,7 @@ mod unit_tests { seccomp_action: SeccompAction::Allow, rate_limiter_config: None, exit_evt: EventFd::new(libc::EFD_NONBLOCK).unwrap(), + device_status: Arc::new(Default::default()), } } diff --git a/virtio-devices/src/pmem.rs b/virtio-devices/src/pmem.rs index 549b62fd96..10cce76cb3 100644 --- a/virtio-devices/src/pmem.rs +++ b/virtio-devices/src/pmem.rs @@ -377,12 +377,13 @@ impl VirtioDevice for Pmem { self.read_config_from_slice(self.config.as_slice(), offset, data); } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - mut queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + mut queues, + .. + } = context; self.common.activate(&queues, interrupt_cb.clone())?; let (kill_evt, pause_evt) = self.common.dup_eventfds(); if let Some(disk) = self.disk.as_ref() { diff --git a/virtio-devices/src/rng.rs b/virtio-devices/src/rng.rs index 2f980d4d8b..a3412a9e07 100644 --- a/virtio-devices/src/rng.rs +++ b/virtio-devices/src/rng.rs @@ -244,12 +244,13 @@ impl VirtioDevice for Rng { self.common.ack_features(value); } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - mut queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + mut queues, + .. + } = context; self.common.activate(&queues, interrupt_cb.clone())?; let (kill_evt, pause_evt) = self.common.dup_eventfds(); diff --git a/virtio-devices/src/transport/pci_common_config.rs b/virtio-devices/src/transport/pci_common_config.rs index 07f5b4fc0f..ec4542948f 100644 --- a/virtio-devices/src/transport/pci_common_config.rs +++ b/virtio-devices/src/transport/pci_common_config.rs @@ -6,7 +6,7 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause -use std::sync::atomic::{AtomicU16, Ordering}; +use std::sync::atomic::{AtomicU8, AtomicU16, Ordering}; use std::sync::{Arc, Mutex}; use byteorder::{ByteOrder, LittleEndian}; @@ -125,7 +125,7 @@ pub fn get_vring_size(t: VringType, queue_size: u16) -> u64 { /// le64 queue_used; // 0x30 // read-write pub struct VirtioPciCommonConfig { pub access_platform: Option>, - pub driver_status: u8, + pub driver_status: Arc, pub config_generation: u8, pub device_feature_select: u32, pub driver_feature_select: u32, @@ -141,7 +141,7 @@ impl VirtioPciCommonConfig { ) -> Self { VirtioPciCommonConfig { access_platform, - driver_status: state.driver_status, + driver_status: Arc::new(AtomicU8::new(state.driver_status)), config_generation: state.config_generation, device_feature_select: state.device_feature_select, driver_feature_select: state.driver_feature_select, @@ -153,7 +153,7 @@ impl VirtioPciCommonConfig { fn state(&self) -> VirtioPciCommonConfigState { VirtioPciCommonConfigState { - driver_status: self.driver_status, + driver_status: self.driver_status.load(Ordering::Acquire), config_generation: self.config_generation, device_feature_select: self.device_feature_select, driver_feature_select: self.driver_feature_select, @@ -223,7 +223,7 @@ impl VirtioPciCommonConfig { debug!("read_common_config_byte: offset 0x{offset:x}"); // The driver is only allowed to do aligned, properly sized access. match offset { - 0x14 => self.driver_status, + 0x14 => self.driver_status.load(Ordering::Acquire), 0x15 => self.config_generation, _ => { warn!("invalid virtio config byte read: 0x{offset:x}"); @@ -235,7 +235,7 @@ impl VirtioPciCommonConfig { fn write_common_config_byte(&mut self, offset: u64, value: u8) { debug!("write_common_config_byte: offset 0x{offset:x}"); match offset { - 0x14 => self.driver_status = value, + 0x14 => self.driver_status.store(value, Ordering::Release), _ => { warn!("invalid virtio config byte write: 0x{offset:x}"); } @@ -404,11 +404,8 @@ impl Snapshottable for VirtioPciCommonConfig { #[cfg(test)] mod unit_tests { - use vm_memory::GuestMemoryAtomic; - use vmm_sys_util::eventfd::EventFd; - use super::*; - use crate::{ActivateResult, GuestMemoryMmap, VirtioInterrupt}; + use crate::{ActivateResult, ActivationContext}; struct DummyDevice(u32); const QUEUE_SIZE: u16 = 256; @@ -421,12 +418,7 @@ mod unit_tests { fn queue_max_sizes(&self) -> &[u16] { QUEUE_SIZES } - fn activate( - &mut self, - _mem: GuestMemoryAtomic, - _interrupt_evt: Arc, - _queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, _context: ActivationContext) -> ActivateResult { Ok(()) } @@ -445,7 +437,7 @@ mod unit_tests { fn write_base_regs() { let mut regs = VirtioPciCommonConfig { access_platform: None, - driver_status: 0xaa, + driver_status: Arc::new(AtomicU8::new(0xaa)), config_generation: 0x55, device_feature_select: 0x0, driver_feature_select: 0x0, diff --git a/virtio-devices/src/transport/pci_device.rs b/virtio-devices/src/transport/pci_device.rs index 408611e29a..c4df68c999 100644 --- a/virtio-devices/src/transport/pci_device.rs +++ b/virtio-devices/src/transport/pci_device.rs @@ -10,7 +10,7 @@ use std::any::Any; use std::cmp; use std::io::Write; use std::ops::Deref; -use std::sync::atomic::{AtomicBool, AtomicU16, AtomicUsize, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU16, AtomicUsize, Ordering}; use std::sync::{Arc, Barrier, Mutex}; use anyhow::anyhow; @@ -288,15 +288,18 @@ pub struct VirtioPciDeviceActivator { queues: Option>, barrier: Option>, id: String, + status: Arc, } impl VirtioPciDeviceActivator { - pub fn activate(&mut self) -> ActivateResult { - self.device.lock().unwrap().activate( - self.memory.take().unwrap(), - self.interrupt.take().unwrap(), - self.queues.take().unwrap(), - )?; + pub fn activate(mut self) -> ActivateResult { + let mut locked_device = self.device.lock().unwrap(); + locked_device.activate(crate::device::ActivationContext { + mem: self.memory.take().unwrap(), + interrupt_cb: self.interrupt.take().unwrap(), + queues: self.queues.take().unwrap(), + device_status: self.status, + })?; self.device_activated.store(true, Ordering::SeqCst); if let Some(barrier) = self.barrier.take() { @@ -641,13 +644,13 @@ impl VirtioPciDevice { fn is_driver_ready(&self) -> bool { let ready_bits = (DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_DRIVER_OK | DEVICE_FEATURES_OK) as u8; - self.common_config.driver_status == ready_bits - && self.common_config.driver_status & DEVICE_FAILED as u8 == 0 + let driver_status = self.common_config.driver_status.load(Ordering::SeqCst); + driver_status == ready_bits && (driver_status & DEVICE_FAILED as u8) == 0 } /// Determines if the driver has requested the device (re)init / reset itself fn is_driver_init(&self) -> bool { - self.common_config.driver_status == DEVICE_INIT as u8 + self.common_config.driver_status.load(Ordering::SeqCst) == DEVICE_INIT as u8 } pub fn config_bar_addr(&self) -> u64 { @@ -801,6 +804,7 @@ impl VirtioPciDevice { device_activated: self.device_activated.clone(), barrier, id: self.id.clone(), + status: self.common_config.driver_status.clone(), } } @@ -1219,7 +1223,9 @@ impl PciDevice for VirtioPciDevice { self.common_config.queue_select = 0; } else { error!("Attempt to reset device when not implemented in underlying device"); - self.common_config.driver_status = crate::DEVICE_FAILED as u8; + self.common_config + .driver_status + .store(crate::DEVICE_FAILED as u8, Ordering::SeqCst); } } diff --git a/virtio-devices/src/vdpa.rs b/virtio-devices/src/vdpa.rs index 725f215c77..1996d94470 100644 --- a/virtio-devices/src/vdpa.rs +++ b/virtio-devices/src/vdpa.rs @@ -428,12 +428,13 @@ impl VirtioDevice for Vdpa { } } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - virtio_interrupt: Arc, - queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb: virtio_interrupt, + queues, + .. + } = context; self.activate_vdpa(&mem.memory(), virtio_interrupt.as_ref(), &queues) .map_err(ActivateError::ActivateVdpa)?; diff --git a/virtio-devices/src/vhost_user/blk.rs b/virtio-devices/src/vhost_user/blk.rs index d26350c91a..ea52872aa1 100644 --- a/virtio-devices/src/vhost_user/blk.rs +++ b/virtio-devices/src/vhost_user/blk.rs @@ -19,7 +19,6 @@ use virtio_bindings::virtio_blk::{ VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_WRITE_ZEROES, }; -use virtio_queue::Queue; use vm_memory::{ByteValued, GuestMemoryAtomic}; use vm_migration::protocol::MemoryRangeTable; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; @@ -279,12 +278,13 @@ impl VirtioDevice for Blk { } } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + queues, + .. + } = context; self.common.activate(&queues, interrupt_cb.clone())?; self.guest_memory = Some(mem.clone()); diff --git a/virtio-devices/src/vhost_user/fs.rs b/virtio-devices/src/vhost_user/fs.rs index d0005af90f..ca8c72539d 100644 --- a/virtio-devices/src/vhost_user/fs.rs +++ b/virtio-devices/src/vhost_user/fs.rs @@ -12,7 +12,6 @@ use serde::{Deserialize, Serialize}; use serde_with::{Bytes, serde_as}; use vhost::vhost_user::message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}; use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; -use virtio_queue::Queue; use vm_device::UserspaceMapping; use vm_memory::{ByteValued, GuestMemoryAtomic}; use vm_migration::protocol::MemoryRangeTable; @@ -261,12 +260,13 @@ impl VirtioDevice for Fs { self.read_config_from_slice(self.config.as_slice(), offset, data); } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + queues, + .. + } = context; self.common.activate(&queues, interrupt_cb.clone())?; self.guest_memory = Some(mem.clone()); diff --git a/virtio-devices/src/vhost_user/net.rs b/virtio-devices/src/vhost_user/net.rs index 667c3747b2..4b77b4f91c 100644 --- a/virtio-devices/src/vhost_user/net.rs +++ b/virtio-devices/src/vhost_user/net.rs @@ -20,7 +20,7 @@ use virtio_bindings::virtio_net::{ VIRTIO_NET_F_STATUS, VIRTIO_NET_S_ANNOUNCE, VIRTIO_NET_S_LINK_UP, }; use virtio_bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX; -use virtio_queue::{Queue, QueueT}; +use virtio_queue::QueueT; use vm_memory::{ByteValued, GuestMemoryAtomic}; use vm_migration::protocol::MemoryRangeTable; use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; @@ -372,12 +372,13 @@ impl VirtioDevice for Net { self.read_config_from_slice(config.as_slice(), offset, data); } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - mut queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + mut queues, + .. + } = context; self.common.activate(&queues, interrupt_cb.clone())?; self.guest_memory = Some(mem.clone()); diff --git a/virtio-devices/src/vsock/device.rs b/virtio-devices/src/vsock/device.rs index 27a0af1ff2..bfb3e0c142 100644 --- a/virtio-devices/src/vsock/device.rs +++ b/virtio-devices/src/vsock/device.rs @@ -435,12 +435,13 @@ where } } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + queues, + .. + } = context; self.common.activate(&queues, interrupt_cb.clone())?; let (kill_evt, pause_evt) = self.common.dup_eventfds(); @@ -593,9 +594,12 @@ mod unit_tests { let memory = GuestMemoryAtomic::new(ctx.mem.clone()); // Test a bad activation. - let bad_activate = - ctx.device - .activate(memory.clone(), Arc::new(NoopVirtioInterrupt {}), Vec::new()); + let bad_activate = ctx.device.activate(crate::device::ActivationContext { + mem: memory.clone(), + interrupt_cb: Arc::new(NoopVirtioInterrupt {}), + queues: Vec::new(), + device_status: Arc::new(std::sync::atomic::AtomicU8::new(0)), + }); match bad_activate { Err(ActivateError::BadActivate) => (), other => panic!("{other:?}"), @@ -603,10 +607,10 @@ mod unit_tests { // Test a correct activation. ctx.device - .activate( - memory, - Arc::new(NoopVirtioInterrupt {}), - vec![ + .activate(crate::device::ActivationContext { + mem: memory, + interrupt_cb: Arc::new(NoopVirtioInterrupt {}), + queues: vec![ ( 0, Queue::new(256).unwrap(), @@ -623,7 +627,8 @@ mod unit_tests { EventFd::new(EFD_NONBLOCK).unwrap(), ), ], - ) + device_status: Arc::new(std::sync::atomic::AtomicU8::new(0)), + }) .unwrap(); } diff --git a/virtio-devices/src/watchdog.rs b/virtio-devices/src/watchdog.rs index 6b9f7cc0ac..742a2e0241 100644 --- a/virtio-devices/src/watchdog.rs +++ b/virtio-devices/src/watchdog.rs @@ -326,12 +326,13 @@ impl VirtioDevice for Watchdog { self.common.ack_features(value); } - fn activate( - &mut self, - mem: GuestMemoryAtomic, - interrupt_cb: Arc, - mut queues: Vec<(usize, Queue, EventFd)>, - ) -> ActivateResult { + fn activate(&mut self, context: crate::device::ActivationContext) -> ActivateResult { + let crate::device::ActivationContext { + mem, + interrupt_cb, + mut queues, + .. + } = context; self.common.activate(&queues, interrupt_cb.clone())?; let (kill_evt, pause_evt) = self.common.dup_eventfds(); diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 1e6293c350..ce224272b8 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -1524,6 +1524,11 @@ impl DeviceManager { Ok(()) } + /// Drop restore-only state once all devices have consumed it. + pub(crate) fn clear_restore_snapshot(&mut self) { + self.snapshot = None; + } + #[cfg(feature = "fw_cfg")] pub fn create_fw_cfg_device(&mut self) -> Result<(), DeviceManagerError> { let fw_cfg = Arc::new(Mutex::new(devices::legacy::FwCfg::new( @@ -4566,7 +4571,7 @@ impl DeviceManager { } pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { - for mut activator in self.pending_activations.lock().unwrap().drain(..) { + for activator in self.pending_activations.lock().unwrap().drain(..) { activator .activate() .map_err(DeviceManagerError::VirtioActivate)?; diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 96928b4c1f..9daf5ff8dd 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -639,6 +639,9 @@ impl Vm { snapshot, )?; + // Remove any snapshot artifacts after the hypervisor-specific init. + device_manager.lock().unwrap().clear_restore_snapshot(); + // Load kernel and initramfs files #[cfg(feature = "tdx")] let kernel = config