diff --git a/downstairs/src/dump.rs b/downstairs/src/dump.rs index 250e64588..7da98b2e1 100644 --- a/downstairs/src/dump.rs +++ b/downstairs/src/dump.rs @@ -98,13 +98,37 @@ pub fn dump_region( assert!(!region_dir.is_empty()); for (index, dir) in region_dir.iter().enumerate() { - // Open Region read only - let region = Region::open(dir, false, true, &log)?; + // Open Region read only, tolerating MissingContextSlot errors + let (region, open_errors) = Region::open_for_dump(dir, &log)?; blocks_per_extent = region.def().extent_size().value; total_extents = region.def().extent_count(); - let max_block = total_extents as u64 * blocks_per_extent; + // Resolve a bare block argument to its extent before checking errors + if cmp_extent.is_none() { + if let Some(b) = block { + let max_block = total_extents as u64 * blocks_per_extent; + let ce = (b / blocks_per_extent) as u32; + if ce >= total_extents { + bail!( + "Requested block {} > max block {}", + b, + max_block - 1, + ); + } + cmp_extent = Some(ExtentId(ce)); + } + } + + // If any extent failed to open, decide whether to bail or warn. + for (eid, err) in &open_errors { + if cmp_extent == Some(*eid) { + // The user specifically requested this failed extent. + bail!("extent {} could not be opened: {}", eid, err); + } + println!("WARNING: extent {} skipped: {}", eid, err); + } + /* * The extent number is the index in the overall hashmap. * For each entry in all_extents hashmap, we have an ExtInfo @@ -114,30 +138,12 @@ pub fn dump_region( for e in ®ion.extents { let e = match e { extent::ExtentState::Opened(extent) => extent, - extent::ExtentState::Closed => panic!("dump on closed extent!"), + // Closed here means the extent failed to open; we already + // warned above, so just skip it. + extent::ExtentState::Closed => continue, }; let en = e.number(); - /* - * If we want just a specific block, then figure out what extent - * that block belongs to so we can just display the - * requested information. We only need to do this - * once. - */ - if cmp_extent.is_none() { - if let Some(b) = block { - let ce = (b / blocks_per_extent) as u32; - if ce >= total_extents { - bail!( - "Requested block {} > max block {}", - b, - max_block - 1, - ); - } - cmp_extent = Some(ExtentId(ce)); - } - } - /* * If we are looking at one extent in detail, we skip all the * others. @@ -583,8 +589,12 @@ fn show_extent( * in the Vec based on index. */ for (index, dir) in region_dir.iter().enumerate() { - // Open Region read only - let mut region = Region::open(dir, false, true, &log)?; + let (mut region, open_errors) = Region::open_for_dump(dir, &log)?; + if let Some((_eid, err)) = + open_errors.iter().find(|(e, _)| *e == cmp_extent) + { + bail!("extent {} could not be opened: {}", cmp_extent, err); + } let response = region.region_read( &RegionReadRequest(vec![RegionReadReq { @@ -695,8 +705,12 @@ fn show_extent_block( * in the Vec based on index. */ for (index, dir) in region_dir.iter().enumerate() { - // Open Region read only - let mut region = Region::open(dir, false, true, &log)?; + let (mut region, open_errors) = Region::open_for_dump(dir, &log)?; + if let Some((_eid, err)) = + open_errors.iter().find(|(e, _)| *e == cmp_extent) + { + bail!("extent {} could not be opened: {}", cmp_extent, err); + } let response = region.region_read( &RegionReadRequest(vec![RegionReadReq { diff --git a/downstairs/src/region.rs b/downstairs/src/region.rs index 01dce10d1..6351d83ed 100644 --- a/downstairs/src/region.rs +++ b/downstairs/src/region.rs @@ -248,10 +248,9 @@ impl Region { Ok(region) } - /** - * Open an existing region file - */ - pub fn open>( + /// Read region config and validate versions, returning a `Region` with + /// an empty extents Vec. Callers must open extents separately. + fn setup>( dir: P, verbose: bool, read_only: bool, @@ -307,10 +306,7 @@ impl Region { .num_threads(WORKER_POOL_SIZE) .build()?; - /* - * Open every extent that presently exists. - */ - let mut region = Region { + Ok(Region { dir: dir.as_ref().to_path_buf(), def, extents: Vec::new(), @@ -318,13 +314,42 @@ impl Region { read_only, log: log.clone(), pool, - }; + }) + } + /** + * Open an existing region file + */ + pub fn open>( + dir: P, + verbose: bool, + read_only: bool, + log: &Logger, + ) -> Result { + let mut region = Self::setup(dir, verbose, read_only, log)?; region.open_extents()?; - Ok(region) } + /// Open an existing region read-only, tolerating + /// [`CrucibleError::MissingContextSlot`] errors on individual extents. + /// + /// Returns the opened region along with a Vec of `(ExtentId, error)` for + /// any extents that could not be opened. Those extents are left as + /// [`ExtentState::Closed`] in the region; callers must check the error + /// Vec before accessing them. + /// + /// Any error other than `MissingContextSlot` (e.g. missing files, I/O + /// errors, version mismatches) is still fatal. + pub(crate) fn open_for_dump>( + dir: P, + log: &Logger, + ) -> Result<(Region, Vec<(ExtentId, CrucibleError)>)> { + let mut region = Self::setup(dir, false, true, log)?; + let errors = region.try_open_extents()?; + Ok((region, errors)) + } + pub fn encrypted(&self) -> bool { self.def.get_encrypted() } @@ -353,26 +378,54 @@ impl Region { /// /// Returns an error if extent files are missing. fn open_extents(&mut self) -> Result<()> { + let errs = self.try_open_extents()?; + if let Some((_id, err)) = errs.first() { + Err(err.clone().into()) + } else { + Ok(()) + } + } + + /// Like [`open_extents`], but tolerates + /// [`CrucibleError::MissingContextSlot`] on individual extents. + /// + /// Extents that fail with that error are stored as + /// [`ExtentState::Closed`] (preserving Vec indices) and the error is + /// collected into the returned Vec. Any other error causes an + /// immediate failure. + fn try_open_extents(&mut self) -> Result> { let next_eid = self.extents.len() as u32; + let mut errors: Vec<(ExtentId, CrucibleError)> = Vec::new(); - let eid_range = next_eid..self.def.extent_count(); - for eid in eid_range.map(ExtentId) { - let extent = Extent::open( + for eid in (next_eid..self.def.extent_count()).map(ExtentId) { + match Extent::open( &self.dir, &self.def, eid, self.read_only, &self.log, - )?; - - if extent.dirty() { - self.dirty_extents.insert(eid); + ) { + Ok(extent) => { + if extent.dirty() { + self.dirty_extents.insert(eid); + } + self.extents.push(ExtentState::Opened(extent)); + } + Err(e) => match e.downcast::() { + Ok(ce @ CrucibleError::MissingContextSlot { .. }) => { + errors.push((eid, ce)); + self.extents.push(ExtentState::Closed); + } + Ok(other) => return Err(other.into()), + Err(e) => return Err(e), + }, } - self.extents.push(ExtentState::Opened(extent)); } - self.check_extents(); + // We intentionally leave failed extents as Closed, so we cannot + // use check_extents() here. Verify only the count. + assert_eq!(self.def.extent_count() as usize, self.extents.len()); - Ok(()) + Ok(errors) } /// Creates `self.extent_count` extent files and opens them @@ -1263,7 +1316,8 @@ pub async fn save_stream_to_file( pub(crate) mod test { use bytes::Bytes; use itertools::Itertools; - use std::fs::rename; + use std::fs::{OpenOptions, rename}; + use std::io::{Seek, SeekFrom}; use std::path::PathBuf; use rand::RngCore; @@ -2164,6 +2218,145 @@ pub(crate) mod test { .unwrap(); } + /// Write block 0 of `eid` twice with different data so that both + /// context slots (A and B) are populated. The extent is left dirty + /// (no flush), which is required to trigger the recomputation path on + /// the next open. + fn fill_both_context_slots(region: &mut Region, eid: ExtentId) { + for byte in [0x00u8, 0x01u8] { + let data = Bytes::from(vec![byte; 512]); + let hash = integrity_hash(&[&data[..]]); + let write = RegionWriteReq { + extent: eid, + write: ExtentWrite { + offset: BlockOffset(0), + data, + block_contexts: vec![BlockContext { + encryption_context: None, + hash, + }], + }, + }; + region + .region_write(&RegionWrite(vec![write]), JobId(0), false) + .unwrap(); + } + } + + /// Overwrite block 0 of `eid` on disk with 0xFF bytes so that neither + /// stored context slot hash can match the actual block data. + fn corrupt_block_zero(dir: &Path, eid: ExtentId) { + let path = extent_path(dir, eid); + let mut file = OpenOptions::new().write(true).open(path).unwrap(); + file.seek(SeekFrom::Start(0)).unwrap(); + file.write_all(&[0xFFu8; 512]).unwrap(); + } + + /// Create a 3-extent raw region (10 blocks each) where extent 1 has a + /// corrupted block 0: both context slots are populated but neither + /// matches the on-disk data. Returns the persisted directory path. + fn make_region_with_corrupt_extent() -> PathBuf { + let dir = tempdir().unwrap(); + let mut region = + Region::create(&dir, new_region_options(), csl()).unwrap(); + region.extend(3, Backend::RawFile).unwrap(); + + fill_both_context_slots(&mut region, ExtentId(1)); + + let dir_path = dir.path().to_path_buf(); + // Drop without flushing so the dirty flag stays set on disk. + drop(region); + + corrupt_block_zero(&dir_path, ExtentId(1)); + + // keep() persists the directory past the end of the test. + dir.keep() + } + + /// open_for_dump succeeds on a region with a corrupted extent, returning + /// the extent in the error list and leaving it Closed in the region. + #[test] + fn open_for_dump_skips_missing_context_slot() { + let dir_path = make_region_with_corrupt_extent(); + let (region, errors) = + Region::open_for_dump(&dir_path, &csl()).unwrap(); + + assert_eq!(errors.len(), 1); + let (eid, err) = &errors[0]; + assert_eq!(*eid, ExtentId(1)); + assert!( + matches!(err, CrucibleError::MissingContextSlot { .. }), + "unexpected error: {err}" + ); + + assert!(matches!(region.extents[0], ExtentState::Opened(_))); + assert!(matches!(region.extents[1], ExtentState::Closed)); + assert!(matches!(region.extents[2], ExtentState::Opened(_))); + } + + /// dump_region with no specific extent succeeds even when one extent is + /// corrupted; it prints a warning and skips the bad extent. + #[test] + fn dump_region_skips_corrupt_extent() { + let dir_path = make_region_with_corrupt_extent(); + dump_region(vec![dir_path], None, None, false, false, csl()).unwrap(); + } + + /// dump_region fails when the user explicitly requests the corrupted + /// extent with -e. + #[test] + fn dump_region_errors_on_requested_corrupt_extent() { + let dir_path = make_region_with_corrupt_extent(); + let result = dump_region( + vec![dir_path], + Some(ExtentId(1)), + None, + false, + false, + csl(), + ); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("extent 1 could not be opened"), + "unexpected error: {err}" + ); + } + + /// dump_region fails when the user requests a block (-b) that falls + /// inside the corrupted extent. + #[test] + fn dump_region_errors_on_block_in_corrupt_extent() { + let dir_path = make_region_with_corrupt_extent(); + // extent_size = 10, so blocks 10-19 belong to extent 1; use block 15. + let result = + dump_region(vec![dir_path], None, Some(15), false, false, csl()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("extent 1 could not be opened"), + "unexpected error: {err}" + ); + } + + /// dump_region succeeds when requesting a good extent even though a + /// different extent in both regions has a corrupted block. + #[test] + fn dump_good_extent_with_corrupt_extent_in_region() { + let dir1 = make_region_with_corrupt_extent(); + let dir2 = make_region_with_corrupt_extent(); + + // Request extent 2, which is intact in both regions. The bad + // block in extent 1 should produce only a warning. + dump_region( + vec![dir1, dir2], + Some(ExtentId(2)), + None, + false, + false, + csl(), + ) + .unwrap(); + } + /// Read block data from raw files on disk fn read_file_data( ddef: RegionDefinition,