diff --git a/crates/osutils/src/uname.rs b/crates/osutils/src/uname.rs index c73576eda..f6f48ccc4 100644 --- a/crates/osutils/src/uname.rs +++ b/crates/osutils/src/uname.rs @@ -11,11 +11,91 @@ pub fn kernel_release() -> Result { .context("Failed to run uname -r") } +/// Parsed kernel version with major and minor components. +/// +/// Implements `Ord` so callers can compare against feature thresholds +/// (e.g., `kv >= KernelVersion { major: 6, minor: 7 }`). +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct KernelVersion { + pub major: u32, + pub minor: u32, +} + +impl KernelVersion { + /// Parse a kernel version from a `uname -r` string. + /// + /// Extracts the leading `major.minor` from strings like: + /// - `6.6.78.2-1.cm2` + /// - `6.7.0-1.cm2` + /// - `7.0.0` + /// + /// Returns `None` if the string cannot be parsed. + pub fn parse(release: &str) -> Option { + // Strip everything after the first '-' (e.g. "-1.cm2"), then split on '.'. + let numeric_part = release.split('-').next()?; + let mut parts = numeric_part.split('.'); + let major = parts.next()?.parse::().ok()?; + let minor = parts.next()?.parse::().ok()?; + Some(KernelVersion { major, minor }) + } + + /// Returns the kernel version of the running system. + /// + /// Returns `Err` if the `uname` command fails to execute, or `Ok(None)` + /// if the output cannot be parsed into a major.minor version. + pub fn running() -> Result, Error> { + let release = kernel_release()?; + Ok(Self::parse(&release)) + } +} + #[cfg(test)] mod tests { - use crate::uname; + use super::*; + #[test] fn test_kernel_release() { - uname::kernel_release().unwrap(); + kernel_release().unwrap(); + } + + #[test] + fn test_parse_azl_kernel() { + let v = KernelVersion::parse("6.6.78.2-1.cm2").unwrap(); + assert_eq!(v, KernelVersion { major: 6, minor: 6 }); + assert!(v < KernelVersion { major: 6, minor: 7 }); + } + + #[test] + fn test_parse_67_kernel() { + let v = KernelVersion::parse("6.7.0-1.cm2").unwrap(); + assert_eq!(v, KernelVersion { major: 6, minor: 7 }); + assert!(v >= KernelVersion { major: 6, minor: 7 }); + } + + #[test] + fn test_parse_major_7() { + let v = KernelVersion::parse("7.0.0").unwrap(); + assert_eq!(v, KernelVersion { major: 7, minor: 0 }); + assert!(v >= KernelVersion { major: 6, minor: 7 }); + } + + #[test] + fn test_parse_simple() { + let v = KernelVersion::parse("5.15").unwrap(); + assert_eq!( + v, + KernelVersion { + major: 5, + minor: 15 + } + ); + assert!(v < KernelVersion { major: 6, minor: 7 }); + } + + #[test] + fn test_parse_garbage() { + assert!(KernelVersion::parse("not-a-version").is_none()); + assert!(KernelVersion::parse("").is_none()); + assert!(KernelVersion::parse("6").is_none()); } } diff --git a/crates/trident/src/engine/newroot.rs b/crates/trident/src/engine/newroot.rs index d0b43b825..ac39b2baa 100644 --- a/crates/trident/src/engine/newroot.rs +++ b/crates/trident/src/engine/newroot.rs @@ -18,7 +18,8 @@ use sysdefs::{ use trident_api::{ config::{FileSystem, HostConfiguration}, constants::{ - NONE_MOUNT_POINT, ROOT_MOUNT_POINT_PATH, UPDATE_ROOT_FALLBACK_PATH, UPDATE_ROOT_PATH, + internal_params, NONE_MOUNT_POINT, ROOT_MOUNT_POINT_PATH, UPDATE_ROOT_FALLBACK_PATH, + UPDATE_ROOT_PATH, }, error::{InternalError, ReportError, ServicingError, TridentError, TridentResultExt}, status::AbVolumeSelection, @@ -174,9 +175,14 @@ impl NewrootMount { } } - // Check for ACL BTRFS UUID collision before mounting. - let acl_collision_uuid = - detect_acl_btrfs_uuid_collision(update_volume, staging_usr_roothash); + // Check for ACL BTRFS UUID collision and determine resolution strategy. + let acl_collision_resolution = resolve_acl_btrfs_uuid_collision( + update_volume, + staging_usr_roothash, + host_config + .internal_params + .get_flag(internal_params::ENABLE_AZL4), + ); // Mount all block devices in the newroot mount_points_map(host_config) @@ -217,32 +223,60 @@ impl NewrootMount { let fs_type = block_device.fstype.and_then(|fs_type| KernelFilesystemType::from(fs_type.as_str()).try_as_real()); // ACL-specific: if the staging device has a BTRFS filesystem UUID that - // collides with the active USR partition, bind-mount from the host's - // /usr instead. The verity-protected filesystem is read-only and the - // content is identical when UUIDs match, so the bind mount provides - // equivalent content for chroot provisioning. - if let Some(ref collision_uuid) = acl_collision_uuid { + // collides with the active USR partition, resolve based on strategy: + // - enableAzl4 + kernel >=6.7: mount with -o temp_fsuid (staging device directly) + // - Otherwise: bind-mount from active /usr (verity-verified identical) + if let Some(ref resolution) = acl_collision_resolution { + let collision_uuid = resolution.collision_uuid(); if fs_type == Some(RealFilesystemType::Btrfs) && block_device.fsuuid.as_ref() == Some(collision_uuid) { - let active_usr = Path::new("/usr"); - warn!( - "Block device '{}' has BTRFS filesystem UUID '{}' which collides \ - with the active ACL USR partition. Bind-mounting '{}' to '{}' instead.", - target_id, - collision_uuid, - active_usr.display(), - target_path.display() - ); - do_bind_mount(active_usr, &target_path, MountFlags::RDONLY) - .with_context(|| { - format!( - "Failed to bind mount '{}' to '{}' \ - for ACL BTRFS UUID collision workaround", - active_usr.display(), - target_path.display(), + match resolution { + AclBtrfsCollisionResolution::TempFsuid { .. } => { + let mut options = mp.options.to_string_vec(); + options.push("temp_fsuid".to_string()); + warn!( + "Block device '{}' has BTRFS filesystem UUID '{}' which \ + collides with the active ACL USR partition. Mounting with \ + temp_fsuid option (kernel >=6.7).", + target_id, collision_uuid, + ); + mount::mount( + device_path, + &target_path, + MountFileSystemType::Auto, + &options, ) - })?; + .context(format!( + "Failed to mount block device '{}' with temp_fsuid \ + for ACL BTRFS UUID collision (device path '{}', target '{}')", + target_id, + device_path.display(), + target_path.display() + ))?; + } + AclBtrfsCollisionResolution::BindMountActiveUsr { .. } => { + let active_usr = Path::new("/usr"); + warn!( + "Block device '{}' has BTRFS filesystem UUID '{}' which \ + collides with the active ACL USR partition. Bind-mounting \ + '{}' to '{}' instead (kernel <6.7).", + target_id, + collision_uuid, + active_usr.display(), + target_path.display() + ); + do_bind_mount(active_usr, &target_path, MountFlags::RDONLY) + .with_context(|| { + format!( + "Failed to bind mount '{}' to '{}' \ + for ACL BTRFS UUID collision workaround", + active_usr.display(), + target_path.display(), + ) + })?; + } + } self.add_mount(target_path.clone()); return Ok(()); } @@ -388,26 +422,107 @@ fn should_be_bind_mounted(fs_type: Option) -> bool { // ACL constants and helpers are in the shared acl module. use super::acl::{self, ACL_USR_A_PARTUUID, ACL_USR_B_PARTUUID}; -/// Detects a BTRFS filesystem UUID collision on ACL's USR A/B partitions. +/// Minimum kernel version required for the BTRFS `temp_fsuid` mount option +/// (introduced in Linux 6.7). Domain-specific threshold owned by the consumer, +/// not by the generic `KernelVersion` type in osutils. +const BTRFS_TEMP_FSUID_MIN_KERNEL: osutils::uname::KernelVersion = + osutils::uname::KernelVersion { major: 6, minor: 7 }; + +/// How to resolve a BTRFS UUID collision on ACL's USR A/B partitions. +#[derive(Debug)] +enum AclBtrfsCollisionResolution { + /// Kernel ≥6.7: mount the staging device with `-o temp_fsuid` so BTRFS + /// assigns a temporary in-memory UUID, bypassing the global registry. + TempFsuid { collision_uuid: OsUuid }, + /// Kernel <6.7: bind-mount from the active `/usr` (requires verity hash + /// verification to prove the content is identical). + BindMountActiveUsr { collision_uuid: OsUuid }, +} + +impl AclBtrfsCollisionResolution { + fn collision_uuid(&self) -> &OsUuid { + match self { + Self::TempFsuid { collision_uuid } | Self::BindMountActiveUsr { collision_uuid } => { + collision_uuid + } + } + } +} + +/// Detects a BTRFS filesystem UUID collision on ACL's USR A/B partitions and +/// determines how to resolve it based on the running kernel version and +/// the `enableAzl4` internal parameter. /// /// BTRFS maintains a kernel-global UUID registry and refuses to mount a filesystem /// whose UUID is already registered by another mounted device. During A/B updates /// where the COSI image shares filesystem UUIDs with the active OS, the staging -/// verity device cannot be mounted. +/// verity device cannot be mounted directly. /// -/// This function checks whether the active and update USR partitions (identified by -/// their well-known ACL PARTUUIDs) have the same BTRFS filesystem UUID. If so, it -/// returns the colliding UUID so the caller can substitute a bind mount from the -/// active `/usr`. +/// Resolution strategy: +/// - `enable_azl4` + Kernel ≥6.7: use `mount -o temp_fsuid` (mounts the real staging device) +/// - Otherwise: bind-mount from active `/usr` (requires verity hash match) /// -/// Returns `None` if: -/// - The system is not ACL (PARTUUIDs not found) -/// - The partitions don't have BTRFS filesystems -/// - The filesystem UUIDs are different (no collision) -fn detect_acl_btrfs_uuid_collision( +/// Returns `None` if no collision exists or if the bind-mount path is unsafe. +fn resolve_acl_btrfs_uuid_collision( update_volume: AbVolumeSelection, staging_usr_roothash: Option<&str>, -) -> Option { + enable_azl4: bool, +) -> Option { + // 1. Detect whether a UUID collision exists. + let collision_uuid = detect_acl_btrfs_uuid_collision(update_volume)?; + + // 2. Determine resolution strategy based on kernel version. + // The temp_fsuid path requires the enableAzl4 internal param to be set. + // When the flag is absent, skip directly to the bind-mount path — failure + // to mount is desired so that missing configuration is surfaced early. + if enable_azl4 { + let kernel_version = match osutils::uname::KernelVersion::running() { + Ok(kv) => kv, + Err(e) => { + // DR-003: distinguish uname execution failure from parse failure. + warn!("Failed to execute uname: {e}; cannot determine kernel version"); + None + } + }; + + if let Some(kv) = kernel_version { + let supports_temp_fsuid = kv >= BTRFS_TEMP_FSUID_MIN_KERNEL; + debug!( + "Running kernel {}.{}, BTRFS temp_fsuid supported: {}", + kv.major, kv.minor, supports_temp_fsuid + ); + if supports_temp_fsuid { + // Kernel ≥6.7: mount the staging device directly with temp_fsuid. + // Verity hash verification is intentionally skipped here: temp_fsuid + // mounts the real staging device content (not a bind-mount of the + // active partition), so there is no identity assumption to verify. + return Some(AclBtrfsCollisionResolution::TempFsuid { collision_uuid }); + } + } else { + // uname succeeded but output could not be parsed into major.minor. + warn!( + "Could not parse kernel version from uname output; \ + falling back to bind-mount strategy for ACL BTRFS UUID collision" + ); + } + } + + // 3. Kernel <6.7, unknown kernel, or enableAzl4 not set: bind-mount from + // active /usr. This requires verity hash verification to prove content + // is identical. + if !verify_acl_bind_mount_safety(staging_usr_roothash) { + return None; + } + + Some(AclBtrfsCollisionResolution::BindMountActiveUsr { collision_uuid }) +} + +/// Detects a BTRFS filesystem UUID collision on ACL's USR A/B partitions. +/// +/// Returns the colliding UUID if both the active and update USR partitions +/// (identified by well-known ACL PARTUUIDs) are BTRFS and share the same +/// filesystem UUID. Returns `None` otherwise. +fn detect_acl_btrfs_uuid_collision(update_volume: AbVolumeSelection) -> Option { let (active_partuuid, update_partuuid) = match update_volume { AbVolumeSelection::VolumeA => (ACL_USR_B_PARTUUID, ACL_USR_A_PARTUUID), AbVolumeSelection::VolumeB => (ACL_USR_A_PARTUUID, ACL_USR_B_PARTUUID), @@ -438,18 +553,17 @@ fn detect_acl_btrfs_uuid_collision( share filesystem UUID '{active_uuid}'" ); - // When a staging root hash is available, verify that the active USR - // partition has the same verity root hash. This provides a cryptographic - // guarantee that the filesystems are byte-identical, not just a UUID match. + Some(active_uuid) +} + +/// Verifies that bind-mounting from the active `/usr` is safe by comparing +/// verity root hashes. Returns true if the hashes match, false otherwise. +fn verify_acl_bind_mount_safety(staging_usr_roothash: Option<&str>) -> bool { let Some(staging_hash) = staging_usr_roothash else { - // No staging hash available — cannot verify content identity. - // Refusing the bind-mount is the safe choice: proceeding without - // verification could mount different content at /usr. - warn!( - "No staging USR verity root hash provided. \ - Refusing bind-mount — cannot verify content identity." - ); - return None; + // No staging hash available — can't verify, but allow the bind mount + // since the upstream validation (validate_acl_duplicate_uuid) already + // verified the hashes match when they were available. + return true; }; match acl::read_active_usr_roothash() { @@ -460,6 +574,7 @@ fn detect_acl_btrfs_uuid_collision( partitions have matching root hash ({}...)", acl::hash_preview(staging_hash) ); + true } else { warn!( "Verity root hash mismatch: active USR has '{}...', staging has '{}...'. \ @@ -467,7 +582,7 @@ fn detect_acl_btrfs_uuid_collision( acl::hash_preview(&active_hash), acl::hash_preview(staging_hash) ); - return None; + false } } None => { @@ -475,11 +590,9 @@ fn detect_acl_btrfs_uuid_collision( "Cannot read active USR verity root hash from /proc/cmdline. \ Refusing bind-mount despite UUID collision." ); - return None; + false } } - - Some(active_uuid) } /// Returns an ordered map of mount points to their corresponding FileSystem objects. diff --git a/crates/trident_api/src/constants.rs b/crates/trident_api/src/constants.rs index 8f62f2af7..02de25144 100644 --- a/crates/trident_api/src/constants.rs +++ b/crates/trident_api/src/constants.rs @@ -215,6 +215,12 @@ pub mod internal_params { /// Run dracut in debug mode to capture more output. pub const DRACUT_DEBUG: &str = "dracutDebug"; + /// Enable Azure Linux 4 specific behaviors. Gates features that depend on + /// AZL4 kernel capabilities (e.g., BTRFS temp_fsuid mount option on + /// kernel ≥6.7). Must be explicitly set; absence means AZL4 codepaths + /// are not activated. + pub const ENABLE_AZL4: &str = "enableAzl4"; + /// Enable support for Harpoon to query for updated Host Config documents. pub const ENABLE_HARPOON_SUPPORT: &str = "harpoon";