diff --git a/downstairs/src/dump.rs b/downstairs/src/dump.rs index 3d9a89d71..78db92e01 100644 --- a/downstairs/src/dump.rs +++ b/downstairs/src/dump.rs @@ -1,6 +1,7 @@ // Copyright 2021 Oxide Computer Company use super::*; use crate::extent::ExtentMeta; +use rayon::prelude::*; use std::convert::TryInto; use sha2::{Digest, Sha256}; @@ -10,6 +11,33 @@ struct ExtInfo { ei_hm: HashMap, } +pub fn verify_region(region_dir: PathBuf, log: Logger) -> Result<()> { + let region = Region::open(region_dir, false, true, &log)?; + let errors: Vec<_> = region + .extents + .par_iter() + .filter_map(|e| { + let extent = match e { + extent::ExtentState::Opened(extent) => extent, + extent::ExtentState::Closed => panic!("dump on closed extent!"), + }; + + if let Err(err) = extent.validate() { + Some((extent.number, err)) + } else { + None + } + }) + .collect(); + + if !errors.is_empty() { + for (number, err) in &errors { + println!("validation failed for extent {}: {:?}", number, err); + } + bail!("Region failed to verify"); + } + Ok(()) +} /* * Dump the metadata for one or more region directories. * diff --git a/downstairs/src/extent.rs b/downstairs/src/extent.rs index 7d255bea5..4bba34762 100644 --- a/downstairs/src/extent.rs +++ b/downstairs/src/extent.rs @@ -35,6 +35,7 @@ pub(crate) trait ExtentInner: Send + Sync + Debug { fn gen_number(&self) -> Result; fn flush_number(&self) -> Result; fn dirty(&self) -> Result; + fn validate(&self) -> Result<(), CrucibleError>; /// Performs any metadata updates needed before a flush fn pre_flush( @@ -511,6 +512,11 @@ impl Extent { Ok((gen, flush, dirty)) } + /// Validates the extent data + pub fn validate(&self) -> Result<(), CrucibleError> { + self.inner.validate() + } + /** * Create an extent at the location requested. * Start off with the default meta data. diff --git a/downstairs/src/extent_inner_raw.rs b/downstairs/src/extent_inner_raw.rs index 2ba1ef6ad..715630ae4 100644 --- a/downstairs/src/extent_inner_raw.rs +++ b/downstairs/src/extent_inner_raw.rs @@ -581,6 +581,80 @@ impl ExtentInner for RawInner { r } + fn validate(&self) -> Result<(), CrucibleError> { + let block_size = self.extent_size.block_size_in_bytes() as usize; + + // Read context data to local arrays + let ctx_a = self.layout.read_context_slots_contiguous( + &self.file, + 0, + self.layout.block_count(), + ContextSlot::A, + )?; + let ctx_b = self.layout.read_context_slots_contiguous( + &self.file, + 0, + self.layout.block_count(), + ContextSlot::B, + )?; + + // Read blocks in bulk, 128 KiB at a time + let nblocks = 128 * 1024 / block_size; + let mut buf = vec![0; block_size * nblocks]; + for start_block in (0..self.extent_size.value).step_by(nblocks) { + let num_blocks = + ((self.extent_size.value - start_block) as usize).min(nblocks); + + // Read the block data itself: + buf.resize(num_blocks * block_size, 0u8); + pread_all( + self.file.as_fd(), + &mut buf, + block_size as i64 * start_block as i64, + ) + .map_err(|e| { + CrucibleError::IoError(format!( + "extent {}: reading block {start_block} data failed: {e}", + self.extent_number + )) + })?; + + // Hash and check individual blocks against context slots + for (i, data) in buf.chunks_exact(block_size).enumerate() { + let block = start_block as usize + i; + let hash = integrity_hash(&[data]); + + // Pick out the active context slot + let context = match self.active_context[block as u64] { + ContextSlot::A => &ctx_a, + ContextSlot::B => &ctx_b, + }[block]; + + if let Some(context) = context { + if context.on_disk_hash == hash { + // great work, everyone + } else { + return Err(CrucibleError::GenericError(format!( + "block {block} has an active slot \ + with mismatched hash" + ))); + } + } else { + // context slot is empty, hopefully data is as well! + if data.iter().all(|v| *v == 0u8) { + // great work, everyone + } else { + return Err(CrucibleError::GenericError(format!( + "block {block} has an empty active slot, \ + but contains non-zero data", + ))); + } + } + } + } + Ok(()) + } + #[cfg(test)] fn set_dirty_and_block_context( &mut self, diff --git a/downstairs/src/extent_inner_sqlite.rs b/downstairs/src/extent_inner_sqlite.rs index e1d309c25..c9bdec223 100644 --- a/downstairs/src/extent_inner_sqlite.rs +++ b/downstairs/src/extent_inner_sqlite.rs @@ -103,6 +103,12 @@ impl ExtentInner for SqliteInner { Ok(bc) } + fn validate(&self) -> Result<(), CrucibleError> { + Err(CrucibleError::GenericError( + "`validate` is not implemented for Sqlite extent".to_owned(), + )) + } + #[cfg(test)] fn set_dirty_and_block_context( &mut self, diff --git a/downstairs/src/lib.rs b/downstairs/src/lib.rs index ce4b14f57..e7d692ca3 100644 --- a/downstairs/src/lib.rs +++ b/downstairs/src/lib.rs @@ -52,7 +52,7 @@ use extent::ExtentState; use region::Region; pub use admin::run_dropshot; -pub use dump::dump_region; +pub use dump::{dump_region, verify_region}; pub use dynamometer::*; pub use stats::{DsCountStat, DsStatOuter}; diff --git a/downstairs/src/main.rs b/downstairs/src/main.rs index 64b49942c..3b39579fa 100644 --- a/downstairs/src/main.rs +++ b/downstairs/src/main.rs @@ -218,6 +218,12 @@ enum Args { #[clap(long, default_value = "127.0.0.1:4567", action)] bind_addr: SocketAddr, }, + /// Verify region + Verify { + /// Directory containing a region. + #[clap(short, long, value_name = "DIRECTORY", action)] + data: PathBuf, + }, Version, /// Measure an isolated downstairs' disk usage Dynamometer { @@ -465,6 +471,7 @@ async fn main() -> Result<()> { run_dropshot(bind_addr, &log).await } + Args::Verify { data } => verify_region(data, log), Args::Version => { let info = crucible_common::BuildInfo::default(); println!("Crucible Version: {}", info);