Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 82 additions & 2 deletions crates/osutils/src/uname.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,91 @@ pub fn kernel_release() -> Result<String, Error> {
.context("Failed to run uname -r")
}

/// Parsed kernel version with major and minor components.
///
/// Implements `Ord` so callers can compare against feature thresholds
/// (e.g., `kv >= KernelVersion { major: 6, minor: 7 }`).
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct KernelVersion {
pub major: u32,
pub minor: u32,
}

impl KernelVersion {
/// Parse a kernel version from a `uname -r` string.
///
/// Extracts the leading `major.minor` from strings like:
/// - `6.6.78.2-1.cm2`
/// - `6.7.0-1.cm2`
/// - `7.0.0`
///
/// Returns `None` if the string cannot be parsed.
pub fn parse(release: &str) -> Option<Self> {
// Strip everything after the first '-' (e.g. "-1.cm2"), then split on '.'.
let numeric_part = release.split('-').next()?;
let mut parts = numeric_part.split('.');
let major = parts.next()?.parse::<u32>().ok()?;
let minor = parts.next()?.parse::<u32>().ok()?;
Some(KernelVersion { major, minor })
}

/// Returns the kernel version of the running system.
///
/// Returns `Err` if the `uname` command fails to execute, or `Ok(None)`
/// if the output cannot be parsed into a major.minor version.
pub fn running() -> Result<Option<Self>, Error> {
let release = kernel_release()?;
Ok(Self::parse(&release))
}
}

#[cfg(test)]
mod tests {
use crate::uname;
use super::*;

#[test]
fn test_kernel_release() {
uname::kernel_release().unwrap();
kernel_release().unwrap();
}

#[test]
fn test_parse_azl_kernel() {
let v = KernelVersion::parse("6.6.78.2-1.cm2").unwrap();
assert_eq!(v, KernelVersion { major: 6, minor: 6 });
assert!(v < KernelVersion { major: 6, minor: 7 });
}

#[test]
fn test_parse_67_kernel() {
let v = KernelVersion::parse("6.7.0-1.cm2").unwrap();
assert_eq!(v, KernelVersion { major: 6, minor: 7 });
assert!(v >= KernelVersion { major: 6, minor: 7 });
}

#[test]
fn test_parse_major_7() {
let v = KernelVersion::parse("7.0.0").unwrap();
assert_eq!(v, KernelVersion { major: 7, minor: 0 });
assert!(v >= KernelVersion { major: 6, minor: 7 });
}

#[test]
fn test_parse_simple() {
let v = KernelVersion::parse("5.15").unwrap();
assert_eq!(
v,
KernelVersion {
major: 5,
minor: 15
}
);
assert!(v < KernelVersion { major: 6, minor: 7 });
}

#[test]
fn test_parse_garbage() {
assert!(KernelVersion::parse("not-a-version").is_none());
assert!(KernelVersion::parse("").is_none());
assert!(KernelVersion::parse("6").is_none());
}
}
219 changes: 166 additions & 53 deletions crates/trident/src/engine/newroot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ use sysdefs::{
use trident_api::{
config::{FileSystem, HostConfiguration},
constants::{
NONE_MOUNT_POINT, ROOT_MOUNT_POINT_PATH, UPDATE_ROOT_FALLBACK_PATH, UPDATE_ROOT_PATH,
internal_params, NONE_MOUNT_POINT, ROOT_MOUNT_POINT_PATH, UPDATE_ROOT_FALLBACK_PATH,
UPDATE_ROOT_PATH,
},
error::{InternalError, ReportError, ServicingError, TridentError, TridentResultExt},
status::AbVolumeSelection,
Expand Down Expand Up @@ -174,9 +175,14 @@ impl NewrootMount {
}
}

// Check for ACL BTRFS UUID collision before mounting.
let acl_collision_uuid =
detect_acl_btrfs_uuid_collision(update_volume, staging_usr_roothash);
// Check for ACL BTRFS UUID collision and determine resolution strategy.
let acl_collision_resolution = resolve_acl_btrfs_uuid_collision(
update_volume,
staging_usr_roothash,
host_config
.internal_params
.get_flag(internal_params::ENABLE_AZL4),
);

// Mount all block devices in the newroot
mount_points_map(host_config)
Expand Down Expand Up @@ -217,32 +223,60 @@ impl NewrootMount {
let fs_type = block_device.fstype.and_then(|fs_type| KernelFilesystemType::from(fs_type.as_str()).try_as_real());

// ACL-specific: if the staging device has a BTRFS filesystem UUID that
// collides with the active USR partition, bind-mount from the host's
// /usr instead. The verity-protected filesystem is read-only and the
// content is identical when UUIDs match, so the bind mount provides
// equivalent content for chroot provisioning.
if let Some(ref collision_uuid) = acl_collision_uuid {
// collides with the active USR partition, resolve based on strategy:
// - enableAzl4 + kernel >=6.7: mount with -o temp_fsuid (staging device directly)
// - Otherwise: bind-mount from active /usr (verity-verified identical)
if let Some(ref resolution) = acl_collision_resolution {
let collision_uuid = resolution.collision_uuid();
if fs_type == Some(RealFilesystemType::Btrfs)
&& block_device.fsuuid.as_ref() == Some(collision_uuid)
{
let active_usr = Path::new("/usr");
warn!(
"Block device '{}' has BTRFS filesystem UUID '{}' which collides \
with the active ACL USR partition. Bind-mounting '{}' to '{}' instead.",
target_id,
collision_uuid,
active_usr.display(),
target_path.display()
);
do_bind_mount(active_usr, &target_path, MountFlags::RDONLY)
.with_context(|| {
format!(
"Failed to bind mount '{}' to '{}' \
for ACL BTRFS UUID collision workaround",
active_usr.display(),
target_path.display(),
match resolution {
AclBtrfsCollisionResolution::TempFsuid { .. } => {
let mut options = mp.options.to_string_vec();
options.push("temp_fsuid".to_string());
warn!(
"Block device '{}' has BTRFS filesystem UUID '{}' which \
collides with the active ACL USR partition. Mounting with \
temp_fsuid option (kernel >=6.7).",
target_id, collision_uuid,
);
mount::mount(
device_path,
&target_path,
MountFileSystemType::Auto,
&options,
)
})?;
.context(format!(
"Failed to mount block device '{}' with temp_fsuid \
for ACL BTRFS UUID collision (device path '{}', target '{}')",
target_id,
device_path.display(),
target_path.display()
))?;
}
AclBtrfsCollisionResolution::BindMountActiveUsr { .. } => {
let active_usr = Path::new("/usr");
warn!(
"Block device '{}' has BTRFS filesystem UUID '{}' which \
collides with the active ACL USR partition. Bind-mounting \
'{}' to '{}' instead (kernel <6.7).",
target_id,
collision_uuid,
active_usr.display(),
target_path.display()
);
do_bind_mount(active_usr, &target_path, MountFlags::RDONLY)
.with_context(|| {
format!(
"Failed to bind mount '{}' to '{}' \
for ACL BTRFS UUID collision workaround",
active_usr.display(),
target_path.display(),
)
})?;
}
}
self.add_mount(target_path.clone());
return Ok(());
}
Expand Down Expand Up @@ -388,26 +422,107 @@ fn should_be_bind_mounted(fs_type: Option<RealFilesystemType>) -> bool {
// ACL constants and helpers are in the shared acl module.
use super::acl::{self, ACL_USR_A_PARTUUID, ACL_USR_B_PARTUUID};

/// Detects a BTRFS filesystem UUID collision on ACL's USR A/B partitions.
/// Minimum kernel version required for the BTRFS `temp_fsuid` mount option
/// (introduced in Linux 6.7). Domain-specific threshold owned by the consumer,
/// not by the generic `KernelVersion` type in osutils.
const BTRFS_TEMP_FSUID_MIN_KERNEL: osutils::uname::KernelVersion =
osutils::uname::KernelVersion { major: 6, minor: 7 };

/// How to resolve a BTRFS UUID collision on ACL's USR A/B partitions.
#[derive(Debug)]
enum AclBtrfsCollisionResolution {
/// Kernel ≥6.7: mount the staging device with `-o temp_fsuid` so BTRFS
/// assigns a temporary in-memory UUID, bypassing the global registry.
TempFsuid { collision_uuid: OsUuid },
/// Kernel <6.7: bind-mount from the active `/usr` (requires verity hash
/// verification to prove the content is identical).
BindMountActiveUsr { collision_uuid: OsUuid },
}

impl AclBtrfsCollisionResolution {
fn collision_uuid(&self) -> &OsUuid {
match self {
Self::TempFsuid { collision_uuid } | Self::BindMountActiveUsr { collision_uuid } => {
collision_uuid
}
}
}
}

/// Detects a BTRFS filesystem UUID collision on ACL's USR A/B partitions and
/// determines how to resolve it based on the running kernel version and
/// the `enableAzl4` internal parameter.
///
/// BTRFS maintains a kernel-global UUID registry and refuses to mount a filesystem
/// whose UUID is already registered by another mounted device. During A/B updates
/// where the COSI image shares filesystem UUIDs with the active OS, the staging
/// verity device cannot be mounted.
/// verity device cannot be mounted directly.
///
/// This function checks whether the active and update USR partitions (identified by
/// their well-known ACL PARTUUIDs) have the same BTRFS filesystem UUID. If so, it
/// returns the colliding UUID so the caller can substitute a bind mount from the
/// active `/usr`.
/// Resolution strategy:
/// - `enable_azl4` + Kernel ≥6.7: use `mount -o temp_fsuid` (mounts the real staging device)
/// - Otherwise: bind-mount from active `/usr` (requires verity hash match)
///
/// Returns `None` if:
/// - The system is not ACL (PARTUUIDs not found)
/// - The partitions don't have BTRFS filesystems
/// - The filesystem UUIDs are different (no collision)
fn detect_acl_btrfs_uuid_collision(
/// Returns `None` if no collision exists or if the bind-mount path is unsafe.
fn resolve_acl_btrfs_uuid_collision(
update_volume: AbVolumeSelection,
staging_usr_roothash: Option<&str>,
) -> Option<OsUuid> {
enable_azl4: bool,
) -> Option<AclBtrfsCollisionResolution> {
// 1. Detect whether a UUID collision exists.
let collision_uuid = detect_acl_btrfs_uuid_collision(update_volume)?;

// 2. Determine resolution strategy based on kernel version.
// The temp_fsuid path requires the enableAzl4 internal param to be set.
// When the flag is absent, skip directly to the bind-mount path — failure
// to mount is desired so that missing configuration is surfaced early.
if enable_azl4 {
let kernel_version = match osutils::uname::KernelVersion::running() {
Ok(kv) => kv,
Err(e) => {
// DR-003: distinguish uname execution failure from parse failure.
warn!("Failed to execute uname: {e}; cannot determine kernel version");
None
}
};

if let Some(kv) = kernel_version {
let supports_temp_fsuid = kv >= BTRFS_TEMP_FSUID_MIN_KERNEL;
debug!(
"Running kernel {}.{}, BTRFS temp_fsuid supported: {}",
kv.major, kv.minor, supports_temp_fsuid
);
if supports_temp_fsuid {
// Kernel ≥6.7: mount the staging device directly with temp_fsuid.
// Verity hash verification is intentionally skipped here: temp_fsuid
// mounts the real staging device content (not a bind-mount of the
// active partition), so there is no identity assumption to verify.
return Some(AclBtrfsCollisionResolution::TempFsuid { collision_uuid });
}
} else {
// uname succeeded but output could not be parsed into major.minor.
warn!(
"Could not parse kernel version from uname output; \
falling back to bind-mount strategy for ACL BTRFS UUID collision"
);
}
}

// 3. Kernel <6.7, unknown kernel, or enableAzl4 not set: bind-mount from
// active /usr. This requires verity hash verification to prove content
// is identical.
if !verify_acl_bind_mount_safety(staging_usr_roothash) {
return None;
}

Some(AclBtrfsCollisionResolution::BindMountActiveUsr { collision_uuid })
}

/// Detects a BTRFS filesystem UUID collision on ACL's USR A/B partitions.
///
/// Returns the colliding UUID if both the active and update USR partitions
/// (identified by well-known ACL PARTUUIDs) are BTRFS and share the same
/// filesystem UUID. Returns `None` otherwise.
fn detect_acl_btrfs_uuid_collision(update_volume: AbVolumeSelection) -> Option<OsUuid> {
let (active_partuuid, update_partuuid) = match update_volume {
AbVolumeSelection::VolumeA => (ACL_USR_B_PARTUUID, ACL_USR_A_PARTUUID),
AbVolumeSelection::VolumeB => (ACL_USR_A_PARTUUID, ACL_USR_B_PARTUUID),
Expand Down Expand Up @@ -438,18 +553,17 @@ fn detect_acl_btrfs_uuid_collision(
share filesystem UUID '{active_uuid}'"
);

// When a staging root hash is available, verify that the active USR
// partition has the same verity root hash. This provides a cryptographic
// guarantee that the filesystems are byte-identical, not just a UUID match.
Some(active_uuid)
}

/// Verifies that bind-mounting from the active `/usr` is safe by comparing
/// verity root hashes. Returns true if the hashes match, false otherwise.
fn verify_acl_bind_mount_safety(staging_usr_roothash: Option<&str>) -> bool {
let Some(staging_hash) = staging_usr_roothash else {
// No staging hash available — cannot verify content identity.
// Refusing the bind-mount is the safe choice: proceeding without
// verification could mount different content at /usr.
warn!(
"No staging USR verity root hash provided. \
Refusing bind-mount — cannot verify content identity."
);
return None;
// No staging hash available — can't verify, but allow the bind mount
// since the upstream validation (validate_acl_duplicate_uuid) already
// verified the hashes match when they were available.
return true;
};

match acl::read_active_usr_roothash() {
Expand All @@ -460,26 +574,25 @@ fn detect_acl_btrfs_uuid_collision(
partitions have matching root hash ({}...)",
acl::hash_preview(staging_hash)
);
true
} else {
warn!(
"Verity root hash mismatch: active USR has '{}...', staging has '{}...'. \
Refusing bind-mount despite UUID collision.",
acl::hash_preview(&active_hash),
acl::hash_preview(staging_hash)
);
return None;
false
}
}
None => {
warn!(
"Cannot read active USR verity root hash from /proc/cmdline. \
Refusing bind-mount despite UUID collision."
);
return None;
false
}
}

Some(active_uuid)
}

/// Returns an ordered map of mount points to their corresponding FileSystem objects.
Expand Down
6 changes: 6 additions & 0 deletions crates/trident_api/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,12 @@ pub mod internal_params {
/// Run dracut in debug mode to capture more output.
pub const DRACUT_DEBUG: &str = "dracutDebug";

/// Enable Azure Linux 4 specific behaviors. Gates features that depend on
/// AZL4 kernel capabilities (e.g., BTRFS temp_fsuid mount option on
/// kernel ≥6.7). Must be explicitly set; absence means AZL4 codepaths
/// are not activated.
pub const ENABLE_AZL4: &str = "enableAzl4";

/// Enable support for Harpoon to query for updated Host Config documents.
pub const ENABLE_HARPOON_SUPPORT: &str = "harpoon";

Expand Down
Loading