diff --git a/README.md b/README.md index 794f77c1..c668f047 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ Usage: cargo rapx [OPTIONS] [-- [CARGO_FLAGS]] Commands: analyze perform various analyses on the crate, e.g., alias analysis, callgraph generation + audit audit unsafe APIs and output a JSON document check check potential vulnerabilities in the crate, e.g., use-after-free, memory leak help Print this message or the help of the given subcommand(s) @@ -62,6 +63,10 @@ Examples: cargo rapx check -f -m -- --tests 3. detect use-after-free and memory leak for all members: cargo rapx check -f -m -- --workspace +4. audit all public unsafe APIs in the current crate (outputs JSON to stderr): + cargo rapx audit unsafe-apis +5. audit all public unsafe APIs in the Rust standard library: + cargo rapx audit std-unsafe-apis Environment Variables (Values are case insensitive): diff --git a/rapx/src/analysis/audit/mod.rs b/rapx/src/analysis/audit/mod.rs new file mode 100644 index 00000000..3664a431 --- /dev/null +++ b/rapx/src/analysis/audit/mod.rs @@ -0,0 +1,280 @@ +/* + * This module implements the `audit unsafe-apis` and `audit std-unsafe-apis` commands. + * It collects all public unsafe functions from the current crate or the standard library + * and outputs them as JSON. + */ + +use crate::analysis::utils::fn_info::{ + check_safety, check_visibility, get_all_std_fns_by_rustc_public, +}; +use rustc_hir::{ + ImplItemKind, PatKind, Safety, TraitFn, TraitItemKind, + def::DefKind, + def_id::{DefId, LOCAL_CRATE}, +}; +use rustc_middle::ty::TyCtxt; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize)] +pub struct ParamInfo { + pub name: String, + pub ty: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct UnsafeApiEntry { + pub module: String, + pub name: String, + pub params: Vec, + pub safety_doc: Option, +} + +/// Returns true if `line` is a Markdown heading that should stop content +/// collection for a `# Safety` section at the given `level` (1 or 2). +fn is_heading_stop(line: &str, level: usize) -> bool { + let is_h1 = line.starts_with("# ") || line == "#"; + if level == 1 { + return is_h1; + } + // level == 2: stop at `#` or `##` headings + is_h1 || line.starts_with("## ") || line == "##" +} + +/// Extract the `# Safety` or `## Safety` section from a Rust doc comment string. +/// +/// The `doc` parameter should be the concatenation of all `#[doc = "..."]` attribute +/// values joined by newlines, as returned by `attr.doc_str()`. +/// +/// Returns the text of the Safety section (with leading/trailing whitespace trimmed), +/// or `None` if no Safety section is present. +pub fn extract_safety_doc(doc: &str) -> Option { + let lines: Vec<&str> = doc.lines().collect(); + let mut start_idx: Option = None; + let mut safety_level: usize = 0; + + for (i, line) in lines.iter().enumerate() { + let trimmed = line.trim(); + if trimmed == "# Safety" { + safety_level = 1; + start_idx = Some(i + 1); + break; + } else if trimmed == "## Safety" { + safety_level = 2; + start_idx = Some(i + 1); + break; + } + } + + let start = start_idx?; + + let mut content_lines: Vec<&str> = Vec::new(); + for line in lines.iter().skip(start) { + let trimmed = line.trim(); + // Stop at any heading at the same or higher level. + // For level 1 (`# Safety`), any `#` heading stops the section. + // For level 2 (`## Safety`), any `#` or `##` heading stops the section. + if is_heading_stop(trimmed, safety_level) { + break; + } + content_lines.push(trimmed); + } + + // Trim trailing empty lines + while content_lines.last().map_or(false, |l| l.is_empty()) { + content_lines.pop(); + } + // Trim leading empty lines + while content_lines.first().map_or(false, |l| l.is_empty()) { + content_lines.remove(0); + } + + let content = content_lines.join("\n"); + if content.is_empty() { None } else { Some(content) } +} + +pub struct AuditUnsafeApis<'tcx> { + tcx: TyCtxt<'tcx>, +} + +impl<'tcx> AuditUnsafeApis<'tcx> { + pub fn new(tcx: TyCtxt<'tcx>) -> Self { + Self { tcx } + } + + /// Run audit for the current (local) crate and print JSON to stderr. + pub fn run_local(&self) { + let entries = self.collect_local(); + match serde_json::to_string_pretty(&entries) { + Ok(json) => eprintln!("{}", json), + Err(e) => eprintln!("audit: JSON serialization error: {}", e), + } + } + + /// Run audit for the Rust standard library and print JSON to stderr. + pub fn run_std(&self) { + let entries = self.collect_std(); + match serde_json::to_string_pretty(&entries) { + Ok(json) => eprintln!("{}", json), + Err(e) => eprintln!("audit: JSON serialization error: {}", e), + } + } + + /// Collect doc comment text for a def_id by joining all `#[doc = "..."]` attrs. + fn get_doc_string(&self, def_id: DefId) -> String { + self.tcx + .get_all_attrs(def_id) + .iter() + .filter_map(|attr| attr.doc_str()) + .map(|sym| sym.as_str().to_string()) + .collect::>() + .join("\n") + } + + /// Extract the `# Safety` section from the doc comment of a def_id. + fn get_safety_doc(&self, def_id: DefId) -> Option { + extract_safety_doc(&self.get_doc_string(def_id)) + } + + /// Get parameter types from the function signature. + fn get_params(&self, def_id: DefId) -> Vec { + let fn_sig = self.tcx.fn_sig(def_id).instantiate_identity(); + let inputs = fn_sig.skip_binder().inputs(); + + // Try to get parameter names from HIR for local functions. + let param_names = self.get_hir_param_names(def_id); + + inputs + .iter() + .enumerate() + .map(|(i, ty)| { + let name = param_names + .get(i) + .cloned() + .unwrap_or_else(|| format!("arg{}", i)); + ParamInfo { + name, + ty: format!("{}", ty), + } + }) + .collect() + } + + /// Attempt to retrieve parameter names from the HIR body for a local function. + fn get_hir_param_names(&self, def_id: DefId) -> Vec { + let Some(local_def_id) = def_id.as_local() else { + return Vec::new(); + }; + + let hir_node = self.tcx.hir_node_by_def_id(local_def_id); + let body_id = match hir_node { + rustc_hir::Node::Item(item) => { + if let rustc_hir::ItemKind::Fn { body, .. } = &item.kind { + Some(*body) + } else { + None + } + } + rustc_hir::Node::ImplItem(item) => { + if let ImplItemKind::Fn(_, body) = item.kind { + Some(body) + } else { + None + } + } + rustc_hir::Node::TraitItem(item) => { + if let TraitItemKind::Fn(_, TraitFn::Provided(body)) = item.kind { + Some(body) + } else { + None + } + } + _ => None, + }; + + if let Some(body_id) = body_id { + let body = self.tcx.hir_body(body_id); + body.params + .iter() + .map(|param| match ¶m.pat.kind { + PatKind::Binding(_, _, ident, _) => ident.name.as_str().to_string(), + _ => "_".to_string(), + }) + .collect() + } else { + Vec::new() + } + } + + /// Build an `UnsafeApiEntry` from a `DefId`. + fn make_entry(&self, def_id: DefId) -> UnsafeApiEntry { + let name = self.tcx.item_name(def_id).as_str().to_string(); + + let module = if let Some(local_def_id) = def_id.as_local() { + // For local items, build the module path as `crate_name[::parent_module]`. + let crate_name = self.tcx.crate_name(LOCAL_CRATE).as_str().to_string(); + let mod_local = self.tcx.parent_module_from_def_id(local_def_id); + let parent_path = self.tcx.def_path_str(mod_local.to_def_id()); + if parent_path.is_empty() { + crate_name + } else { + format!("{}::{}", crate_name, parent_path) + } + } else { + // For external items, derive the module by stripping the trailing `::name` + // component from the full qualified path. + let full_path = self.tcx.def_path_str(def_id); + if let Some(pos) = full_path.rfind("::") { + full_path[..pos].to_string() + } else { + full_path + } + }; + + UnsafeApiEntry { + module, + name, + params: self.get_params(def_id), + safety_doc: self.get_safety_doc(def_id), + } + } + + /// Collect all public unsafe `fn` and `AssocFn` items in the local crate. + fn collect_local(&self) -> Vec { + let mut entries = Vec::new(); + + for local_def_id in self.tcx.mir_keys(()) { + let def_id = local_def_id.to_def_id(); + let kind = self.tcx.def_kind(def_id); + if !matches!(kind, DefKind::Fn | DefKind::AssocFn) { + continue; + } + if !check_visibility(self.tcx, def_id) { + continue; + } + if check_safety(self.tcx, def_id) != Safety::Unsafe { + continue; + } + entries.push(self.make_entry(def_id)); + } + + entries + } + + /// Collect all public unsafe functions from the Rust standard library. + fn collect_std(&self) -> Vec { + let mut entries = Vec::new(); + + let all_std_fns = get_all_std_fns_by_rustc_public(self.tcx); + for def_id in all_std_fns { + if !self.tcx.visibility(def_id).is_public() { + continue; + } + if check_safety(self.tcx, def_id) != Safety::Unsafe { + continue; + } + entries.push(self.make_entry(def_id)); + } + + entries + } +} diff --git a/rapx/src/analysis/mod.rs b/rapx/src/analysis/mod.rs index f8bde8c2..160b957d 100644 --- a/rapx/src/analysis/mod.rs +++ b/rapx/src/analysis/mod.rs @@ -1,3 +1,4 @@ +pub mod audit; pub mod core; pub mod graphs; pub mod opt; diff --git a/rapx/src/cli.rs b/rapx/src/cli.rs index c73bfbb2..064e2cf2 100644 --- a/rapx/src/cli.rs +++ b/rapx/src/cli.rs @@ -27,6 +27,12 @@ pub enum Commands { #[command(subcommand)] kind: AnalysisKind, }, + /// audit unsafe APIs and output a JSON document + #[command(arg_required_else_help = true)] + Audit { + #[command(subcommand)] + kind: AuditKind, + }, /// check potential vulnerabilities in the crate, /// e.g., use-after-free, memory leak Check { @@ -112,6 +118,15 @@ pub enum AnalysisKind { DotMir, } +// use command string to automatically generate help messages +#[derive(Debug, Clone, Copy, Subcommand)] +pub enum AuditKind { + /// output all `pub unsafe` APIs of the current crate as JSON + UnsafeApis, + /// output all `pub unsafe` APIs of the Rust standard library as JSON + StdUnsafeApis, +} + impl RapxArgs { pub fn init_env(&self) { let Commands::Check { diff --git a/rapx/src/help.rs b/rapx/src/help.rs index 8c3db4a6..829989d1 100644 --- a/rapx/src/help.rs +++ b/rapx/src/help.rs @@ -16,6 +16,10 @@ will perform two kinds of detection in a row. cargo rapx check -f -m -- --tests 3. detect use-after-free and memory leak for all members: cargo rapx check -f -m -- --workspace +4. audit all public unsafe APIs in the current crate (outputs JSON to stderr): + cargo rapx audit unsafe-apis +5. audit all public unsafe APIs in the Rust standard library: + cargo rapx audit std-unsafe-apis Environment Variables (Values are case insensitive): diff --git a/rapx/src/lib.rs b/rapx/src/lib.rs index 0b6fb9a4..e08bd72f 100644 --- a/rapx/src/lib.rs +++ b/rapx/src/lib.rs @@ -32,10 +32,11 @@ extern crate rustc_type_ir; extern crate thin_vec; use crate::{ analysis::{core::alias_analysis::mfp::MfpAliasAnalyzer, scan::ScanAnalysis}, - cli::{AliasStrategyKind, AnalysisKind, Commands, OptLevel, RapxArgs}, + cli::{AliasStrategyKind, AnalysisKind, AuditKind, Commands, OptLevel, RapxArgs}, }; use analysis::{ Analysis, + audit::AuditUnsafeApis, core::{ alias_analysis::{AliasAnalysis, FnAliasMapWrapper, default::AliasAnalyzer}, api_dependency::ApiDependencyAnalyzer, @@ -191,6 +192,15 @@ pub fn start_analyzer(tcx: TyCtxt, callback: &RapCallback) { } } + &Commands::Audit { kind } => match kind { + AuditKind::UnsafeApis => { + AuditUnsafeApis::new(tcx).run_local(); + } + AuditKind::StdUnsafeApis => { + AuditUnsafeApis::new(tcx).run_std(); + } + }, + &Commands::Analyze { kind } => match kind { AnalysisKind::Alias { strategy } => { let alias = match strategy { diff --git a/rapx/tests/audit/unsafe_apis_test/Cargo.lock b/rapx/tests/audit/unsafe_apis_test/Cargo.lock new file mode 100644 index 00000000..7142ffe3 --- /dev/null +++ b/rapx/tests/audit/unsafe_apis_test/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "unsafe_apis_test" +version = "0.1.0" diff --git a/rapx/tests/audit/unsafe_apis_test/Cargo.toml b/rapx/tests/audit/unsafe_apis_test/Cargo.toml new file mode 100644 index 00000000..e42d5cc6 --- /dev/null +++ b/rapx/tests/audit/unsafe_apis_test/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "unsafe_apis_test" +version = "0.1.0" +edition = "2021" diff --git a/rapx/tests/audit/unsafe_apis_test/src/lib.rs b/rapx/tests/audit/unsafe_apis_test/src/lib.rs new file mode 100644 index 00000000..ae84e9e7 --- /dev/null +++ b/rapx/tests/audit/unsafe_apis_test/src/lib.rs @@ -0,0 +1,21 @@ +/// Dereferences a raw pointer. +/// +/// # Safety +/// +/// The pointer must be valid and non-null. +pub unsafe fn deref_raw(ptr: *const u8) -> u8 { + *ptr +} + +pub struct MyStruct; + +impl MyStruct { + /// Creates an instance from a raw pointer. + /// + /// # Safety + /// + /// The pointer must point to a valid `MyStruct`. + pub unsafe fn from_raw(ptr: *mut MyStruct) -> &'static mut MyStruct { + &mut *ptr + } +} diff --git a/rapx/tests/tests.rs b/rapx/tests/tests.rs index 6c6783c9..4841dc7f 100644 --- a/rapx/tests/tests.rs +++ b/rapx/tests/tests.rs @@ -68,6 +68,7 @@ const ANALYZE_UPG_CMD: &[&str] = &["analyze", "upg"]; const ANALYZE_SSA_CMD: &[&str] = &["analyze", "ssa"]; const ANALYZE_RANGE_CMD: &[&str] = &["analyze", "range"]; const ANALYZE_CALLGRAPH_CMD: &[&str] = &["analyze", "callgraph"]; +const AUDIT_UNSAFE_APIS_CMD: &[&str] = &["audit", "unsafe-apis"]; // ================Dangling Pointer Detection Test===================== #[test] @@ -546,3 +547,13 @@ fn test_symbolic_interval() { ); } } + +#[test] +fn test_audit_unsafe_apis() { + let output = run_with_args("audit/unsafe_apis_test", AUDIT_UNSAFE_APIS_CMD); + // The audit command outputs JSON to stderr; check that key fields are present. + assert_contain(&output, "\"name\""); + assert_contain(&output, "\"deref_raw\""); + assert_contain(&output, "\"safety_doc\""); + assert_contain(&output, "The pointer must be valid and non-null."); +}