From 60019e5b157813dea62bba646f9ae1594219acac Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Sat, 14 Mar 2026 07:53:15 -0700 Subject: [PATCH 1/4] Make crucible-downstairs dump tolerant of MissingContextSlot errors When a raw extent is dirty and its block data does not match either stored context slot, opening the region fails with MissingContextSlot. Previously this caused the dump subcommand to exit with an error even if a healthy extent was requested. Refactor Region::open to share setup logic with a new open_for_dump function that uses try_open_extents internally. try_open_extents tolerates MissingContextSlot errors per extent, leaving failed extents as ExtentState::Closed and returning a list of (ExtentId, error) to the caller. dump_region now uses open_for_dump and checks the error list before proceeding: if the user requested a specific extent or block that maps to a failed extent, it returns an error; otherwise it prints a warning and skips the failed extent. show_extent and show_extent_block are updated similarly. verify_region is unchanged and still fails on any corrupt extent. Five tests are added covering: open_for_dump skipping a corrupt extent, dump succeeding when a corrupt extent is not requested, dump failing when a corrupt extent is specifically requested by extent number or block number, and dump of a good extent succeeding when another extent is corrupt. --- downstairs/src/dump.rs | 64 ++++++----- downstairs/src/region.rs | 229 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 254 insertions(+), 39 deletions(-) diff --git a/downstairs/src/dump.rs b/downstairs/src/dump.rs index 20a25a6e0..fc3b2fe76 100644 --- a/downstairs/src/dump.rs +++ b/downstairs/src/dump.rs @@ -98,13 +98,37 @@ pub fn dump_region( assert!(!region_dir.is_empty()); for (index, dir) in region_dir.iter().enumerate() { - // Open Region read only - let region = Region::open(dir, false, true, &log)?; + // Open Region read only, tolerating MissingContextSlot errors + let (region, open_errors) = Region::open_for_dump(dir, &log)?; blocks_per_extent = region.def().extent_size().value; total_extents = region.def().extent_count(); - let max_block = total_extents as u64 * blocks_per_extent; + // Resolve a bare block argument to its extent before checking errors + if cmp_extent.is_none() { + if let Some(b) = block { + let max_block = total_extents as u64 * blocks_per_extent; + let ce = (b / blocks_per_extent) as u32; + if ce >= total_extents { + bail!( + "Requested block {} > max block {}", + b, + max_block - 1, + ); + } + cmp_extent = Some(ExtentId(ce)); + } + } + + // If any extent failed to open, decide whether to bail or warn. + for (eid, err) in &open_errors { + if cmp_extent == Some(*eid) { + // The user specifically requested this failed extent. + bail!("extent {} could not be opened: {}", eid, err); + } + println!("WARNING: extent {} skipped: {}", eid, err); + } + /* * The extent number is the index in the overall hashmap. * For each entry in all_extents hashmap, we have an ExtInfo @@ -114,30 +138,12 @@ pub fn dump_region( for e in ®ion.extents { let e = match e { extent::ExtentState::Opened(extent) => extent, - extent::ExtentState::Closed => panic!("dump on closed extent!"), + // Closed here means the extent failed to open; we already + // warned above, so just skip it. + extent::ExtentState::Closed => continue, }; let en = e.number(); - /* - * If we want just a specific block, then figure out what extent - * that block belongs to so we can just display the - * requested information. We only need to do this - * once. - */ - if cmp_extent.is_none() { - if let Some(b) = block { - let ce = (b / blocks_per_extent) as u32; - if ce >= total_extents { - bail!( - "Requested block {} > max block {}", - b, - max_block - 1, - ); - } - cmp_extent = Some(ExtentId(ce)); - } - } - /* * If we are looking at one extent in detail, we skip all the * others. @@ -583,8 +589,9 @@ fn show_extent( * in the Vec based on index. */ for (index, dir) in region_dir.iter().enumerate() { - // Open Region read only - let mut region = Region::open(dir, false, true, &log)?; + // Open Region read only, tolerating MissingContextSlot on + // extents other than the one we're comparing. + let (mut region, _) = Region::open_for_dump(dir, &log)?; let response = region.region_read( &RegionReadRequest(vec![RegionReadReq { @@ -695,8 +702,9 @@ fn show_extent_block( * in the Vec based on index. */ for (index, dir) in region_dir.iter().enumerate() { - // Open Region read only - let mut region = Region::open(dir, false, true, &log)?; + // Open Region read only, tolerating MissingContextSlot on + // extents other than the one we're comparing. + let (mut region, _) = Region::open_for_dump(dir, &log)?; let response = region.region_read( &RegionReadRequest(vec![RegionReadReq { diff --git a/downstairs/src/region.rs b/downstairs/src/region.rs index 32b09aea0..0b1e30026 100644 --- a/downstairs/src/region.rs +++ b/downstairs/src/region.rs @@ -248,10 +248,9 @@ impl Region { Ok(region) } - /** - * Open an existing region file - */ - pub fn open>( + /// Read region config and validate versions, returning a `Region` with + /// an empty extents Vec. Callers must open extents separately. + fn setup>( dir: P, verbose: bool, read_only: bool, @@ -307,10 +306,7 @@ impl Region { .num_threads(WORKER_POOL_SIZE) .build()?; - /* - * Open every extent that presently exists. - */ - let mut region = Region { + Ok(Region { dir: dir.as_ref().to_path_buf(), def, extents: Vec::new(), @@ -318,13 +314,42 @@ impl Region { read_only, log: log.clone(), pool, - }; + }) + } + /** + * Open an existing region file + */ + pub fn open>( + dir: P, + verbose: bool, + read_only: bool, + log: &Logger, + ) -> Result { + let mut region = Self::setup(dir, verbose, read_only, log)?; region.open_extents()?; - Ok(region) } + /// Open an existing region read-only, tolerating + /// [`CrucibleError::MissingContextSlot`] errors on individual extents. + /// + /// Returns the opened region along with a Vec of `(ExtentId, error)` for + /// any extents that could not be opened. Those extents are left as + /// [`ExtentState::Closed`] in the region; callers must check the error + /// Vec before accessing them. + /// + /// Any error other than `MissingContextSlot` (e.g. missing files, I/O + /// errors, version mismatches) is still fatal. + pub fn open_for_dump>( + dir: P, + log: &Logger, + ) -> Result<(Region, Vec<(ExtentId, CrucibleError)>)> { + let mut region = Self::setup(dir, false, true, log)?; + let errors = region.try_open_extents()?; + Ok((region, errors)) + } + pub fn encrypted(&self) -> bool { self.def.get_encrypted() } @@ -375,6 +400,48 @@ impl Region { Ok(()) } + /// Like [`open_extents`], but tolerates + /// [`CrucibleError::MissingContextSlot`] on individual extents. + /// + /// Extents that fail with that error are stored as + /// [`ExtentState::Closed`] (preserving Vec indices) and the error is + /// collected into the returned Vec. Any other error causes an + /// immediate failure. + fn try_open_extents(&mut self) -> Result> { + let next_eid = self.extents.len() as u32; + let mut errors: Vec<(ExtentId, CrucibleError)> = Vec::new(); + + for eid in (next_eid..self.def.extent_count()).map(ExtentId) { + match Extent::open( + &self.dir, + &self.def, + eid, + self.read_only, + &self.log, + ) { + Ok(extent) => { + if extent.dirty() { + self.dirty_extents.insert(eid); + } + self.extents.push(ExtentState::Opened(extent)); + } + Err(e) => match e.downcast::() { + Ok(ce @ CrucibleError::MissingContextSlot { .. }) => { + errors.push((eid, ce)); + self.extents.push(ExtentState::Closed); + } + Ok(other) => return Err(other.into()), + Err(e) => return Err(e), + }, + } + } + // We intentionally leave failed extents as Closed, so we cannot + // use check_extents() here. Verify only the count. + assert_eq!(self.def.extent_count() as usize, self.extents.len()); + + Ok(errors) + } + /// Creates `self.extent_count` extent files and opens them fn create_extents(&mut self, backend: Backend) -> Result<()> { let next_eid = self.extents.len() as u32; @@ -1263,7 +1330,8 @@ pub async fn save_stream_to_file( pub(crate) mod test { use bytes::Bytes; use itertools::Itertools; - use std::fs::rename; + use std::fs::{OpenOptions, rename}; + use std::io::{Seek, SeekFrom}; use std::path::PathBuf; use rand::RngCore; @@ -2164,6 +2232,145 @@ pub(crate) mod test { .unwrap(); } + /// Write block 0 of `eid` twice with different data so that both + /// context slots (A and B) are populated. The extent is left dirty + /// (no flush), which is required to trigger the recomputation path on + /// the next open. + fn fill_both_context_slots(region: &mut Region, eid: ExtentId) { + for byte in [0x00u8, 0x01u8] { + let data = Bytes::from(vec![byte; 512]); + let hash = integrity_hash(&[&data[..]]); + let write = RegionWriteReq { + extent: eid, + write: ExtentWrite { + offset: BlockOffset(0), + data, + block_contexts: vec![BlockContext { + encryption_context: None, + hash, + }], + }, + }; + region + .region_write(&RegionWrite(vec![write]), JobId(0), false) + .unwrap(); + } + } + + /// Overwrite block 0 of `eid` on disk with 0xFF bytes so that neither + /// stored context slot hash can match the actual block data. + fn corrupt_block_zero(dir: &Path, eid: ExtentId) { + let path = extent_path(dir, eid); + let mut file = OpenOptions::new().write(true).open(path).unwrap(); + file.seek(SeekFrom::Start(0)).unwrap(); + file.write_all(&[0xFFu8; 512]).unwrap(); + } + + /// Create a 3-extent raw region (10 blocks each) where extent 1 has a + /// corrupted block 0: both context slots are populated but neither + /// matches the on-disk data. Returns the persisted directory path. + fn make_region_with_corrupt_extent() -> PathBuf { + let dir = tempdir().unwrap(); + let mut region = + Region::create(&dir, new_region_options(), csl()).unwrap(); + region.extend(3, Backend::RawFile).unwrap(); + + fill_both_context_slots(&mut region, ExtentId(1)); + + let dir_path = dir.path().to_path_buf(); + // Drop without flushing so the dirty flag stays set on disk. + drop(region); + + corrupt_block_zero(&dir_path, ExtentId(1)); + + // keep() persists the directory past the end of the test. + dir.keep() + } + + /// open_for_dump succeeds on a region with a corrupted extent, returning + /// the extent in the error list and leaving it Closed in the region. + #[test] + fn open_for_dump_skips_missing_context_slot() { + let dir_path = make_region_with_corrupt_extent(); + let (region, errors) = + Region::open_for_dump(&dir_path, &csl()).unwrap(); + + assert_eq!(errors.len(), 1); + let (eid, err) = &errors[0]; + assert_eq!(*eid, ExtentId(1)); + assert!( + matches!(err, CrucibleError::MissingContextSlot { .. }), + "unexpected error: {err}" + ); + + assert!(matches!(region.extents[0], ExtentState::Opened(_))); + assert!(matches!(region.extents[1], ExtentState::Closed)); + assert!(matches!(region.extents[2], ExtentState::Opened(_))); + } + + /// dump_region with no specific extent succeeds even when one extent is + /// corrupted; it prints a warning and skips the bad extent. + #[test] + fn dump_region_skips_corrupt_extent() { + let dir_path = make_region_with_corrupt_extent(); + dump_region(vec![dir_path], None, None, false, false, csl()).unwrap(); + } + + /// dump_region fails when the user explicitly requests the corrupted + /// extent with -e. + #[test] + fn dump_region_errors_on_requested_corrupt_extent() { + let dir_path = make_region_with_corrupt_extent(); + let result = dump_region( + vec![dir_path], + Some(ExtentId(1)), + None, + false, + false, + csl(), + ); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("extent 1 could not be opened"), + "unexpected error: {err}" + ); + } + + /// dump_region fails when the user requests a block (-b) that falls + /// inside the corrupted extent. + #[test] + fn dump_region_errors_on_block_in_corrupt_extent() { + let dir_path = make_region_with_corrupt_extent(); + // extent_size = 10, so blocks 10-19 belong to extent 1; use block 15. + let result = + dump_region(vec![dir_path], None, Some(15), false, false, csl()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("extent 1 could not be opened"), + "unexpected error: {err}" + ); + } + + /// dump_region succeeds when requesting a good extent even though a + /// different extent in both regions has a corrupted block. + #[test] + fn dump_good_extent_with_corrupt_extent_in_region() { + let dir1 = make_region_with_corrupt_extent(); + let dir2 = make_region_with_corrupt_extent(); + + // Request extent 2, which is intact in both regions. The bad + // block in extent 1 should produce only a warning. + dump_region( + vec![dir1, dir2], + Some(ExtentId(2)), + None, + false, + false, + csl(), + ) + .unwrap(); + } + /// Read block data from raw files on disk fn read_file_data( ddef: RegionDefinition, From ecb447446fa06cdd46e3338a00c51e9b81fb77bd Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 16 Mar 2026 13:13:31 -0700 Subject: [PATCH 2/4] Make open_for_dump crate pub --- downstairs/src/region.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/downstairs/src/region.rs b/downstairs/src/region.rs index 0b1e30026..b91aea640 100644 --- a/downstairs/src/region.rs +++ b/downstairs/src/region.rs @@ -341,7 +341,7 @@ impl Region { /// /// Any error other than `MissingContextSlot` (e.g. missing files, I/O /// errors, version mismatches) is still fatal. - pub fn open_for_dump>( + pub(crate) fn open_for_dump>( dir: P, log: &Logger, ) -> Result<(Region, Vec<(ExtentId, CrucibleError)>)> { From 2554368ab0a49aeb08be7384fb6fd41d2dea6c86 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 16 Mar 2026 13:37:23 -0700 Subject: [PATCH 3/4] PR comments, duplicate less code --- downstairs/src/region.rs | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/downstairs/src/region.rs b/downstairs/src/region.rs index b91aea640..3ea69c644 100644 --- a/downstairs/src/region.rs +++ b/downstairs/src/region.rs @@ -378,26 +378,12 @@ impl Region { /// /// Returns an error if extent files are missing. fn open_extents(&mut self) -> Result<()> { - let next_eid = self.extents.len() as u32; - - let eid_range = next_eid..self.def.extent_count(); - for eid in eid_range.map(ExtentId) { - let extent = Extent::open( - &self.dir, - &self.def, - eid, - self.read_only, - &self.log, - )?; - - if extent.dirty() { - self.dirty_extents.insert(eid); - } - self.extents.push(ExtentState::Opened(extent)); + let errs = self.try_open_extents()?; + if let Some((_id, err)) = errs.first() { + Err(err.clone().into()) + } else { + Ok(()) } - self.check_extents(); - - Ok(()) } /// Like [`open_extents`], but tolerates From 4d4cc95d87dca59ffda6995ddca82a3fb397461e Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 16 Mar 2026 14:12:27 -0700 Subject: [PATCH 4/4] Addressing PR comments Remove wrong comment, but also check the result for open_for_dump and handle it better if we can't open our extent instead of a panic. --- downstairs/src/dump.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/downstairs/src/dump.rs b/downstairs/src/dump.rs index d6daa30bb..7da98b2e1 100644 --- a/downstairs/src/dump.rs +++ b/downstairs/src/dump.rs @@ -589,9 +589,12 @@ fn show_extent( * in the Vec based on index. */ for (index, dir) in region_dir.iter().enumerate() { - // Open Region read only, tolerating MissingContextSlot on - // extents other than the one we're comparing. - let (mut region, _) = Region::open_for_dump(dir, &log)?; + let (mut region, open_errors) = Region::open_for_dump(dir, &log)?; + if let Some((_eid, err)) = + open_errors.iter().find(|(e, _)| *e == cmp_extent) + { + bail!("extent {} could not be opened: {}", cmp_extent, err); + } let response = region.region_read( &RegionReadRequest(vec![RegionReadReq { @@ -702,9 +705,12 @@ fn show_extent_block( * in the Vec based on index. */ for (index, dir) in region_dir.iter().enumerate() { - // Open Region read only, tolerating MissingContextSlot on - // extents other than the one we're comparing. - let (mut region, _) = Region::open_for_dump(dir, &log)?; + let (mut region, open_errors) = Region::open_for_dump(dir, &log)?; + if let Some((_eid, err)) = + open_errors.iter().find(|(e, _)| *e == cmp_extent) + { + bail!("extent {} could not be opened: {}", cmp_extent, err); + } let response = region.region_read( &RegionReadRequest(vec![RegionReadReq {