From 9158815a6e64d5b8d2595900b5cc01a0e71821cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9D=D0=B8=D0=BA=D0=B8=D1=84=D0=BE=D1=80=D0=BE=D0=B2=20?= =?UTF-8?q?=D0=A1=D1=82=D0=B5=D0=BF=D0=B0=D0=BD?= Date: Mon, 12 Jan 2026 13:44:00 +0700 Subject: [PATCH 1/3] feat(security): dead column detection --- pil-analyzer/src/dead_column_checker.rs | 115 ++++++++++++++++++++++++ pil-analyzer/src/lib.rs | 2 + pil-analyzer/src/pil_analyzer.rs | 10 ++- pil-analyzer/src/statement_processor.rs | 2 - pil-analyzer/tests/dead_columns.rs | 84 +++++++++++++++++ 5 files changed, 208 insertions(+), 5 deletions(-) create mode 100644 pil-analyzer/src/dead_column_checker.rs create mode 100644 pil-analyzer/tests/dead_columns.rs diff --git a/pil-analyzer/src/dead_column_checker.rs b/pil-analyzer/src/dead_column_checker.rs new file mode 100644 index 0000000000..af05929ca7 --- /dev/null +++ b/pil-analyzer/src/dead_column_checker.rs @@ -0,0 +1,115 @@ +use std::collections::HashSet; + +use powdr_ast::analyzed::{AlgebraicExpression, Analyzed, PolyID, PolynomialType, PublicDeclaration}; +use powdr_number::FieldElement; +use powdr_parser_util::SourceRef; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DeadColumn { + pub name: String, + pub source: SourceRef, +} + +/// Returns committed/witness columns (including array elements) that are declared but never used +/// by any identity (constraints/lookups/permutations), nor by public declarations. +/// +/// Notes: +/// - Intermediate polynomials are inlined first, so columns used via intermediates are counted as used. +/// - Unused intermediate polynomials do NOT “rescue” witness columns from being considered dead. +pub fn dead_committed_columns(analyzed: &Analyzed) -> Vec { + let used_poly_ids = used_poly_ids(analyzed); + + // Get all committed polynomials, filtering out auto-added symbols and non-committed polynomials + let committed_polynomials = analyzed + .definitions + .iter() + .filter(|(name, (sym, _))| { + !analyzed.auto_added_symbols.contains(*name) + && matches!(sym.kind, powdr_ast::analyzed::SymbolKind::Poly(PolynomialType::Committed)) + }); + + // Filter all used polynomials from the committed polynomials + // Return vector of DeadColumn + committed_polynomials.flat_map(|(_name, (sym, _def))| { + sym.array_elements() + .map(|(elem_name, poly_id)| (elem_name, sym.source.clone(), poly_id)) + .collect::>() + }) + .filter(|(_name, _src, poly_id)| !used_poly_ids.contains(poly_id)) + .map(|(name, source, _poly_id)| DeadColumn { name, source }) + .collect() +} + +pub fn check_no_dead_committed_columns(analyzed: &Analyzed) -> Result<(), String> { + let dead = dead_committed_columns(analyzed); + if dead.is_empty() { + return Ok(()); + } + + let formatted = dead + .iter() + .map(|d| format!("- {} ({})", d.name, format_source(&d.source))) + .collect::>() + .join("\n"); + + Err(format!( + "Dead committed columns detected (declared but never referenced by any constraint/lookup/permutation/public declaration):\n{formatted}" + )) +} + +fn used_poly_ids(analyzed: &Analyzed) -> HashSet { + let mut used: HashSet = HashSet::new(); + + // Count usage from public declarations. + for decl in analyzed.public_declarations.values() { + used_from_public_declaration(decl, &mut used); + } + + // Count usage from identities, after inlining intermediates. + for identity in analyzed.identities_with_inlined_intermediate_polynomials() { + for expr in identity + .left + .selector + .iter() + .chain(identity.left.expressions.iter()) + .chain(identity.right.selector.iter()) + .chain(identity.right.expressions.iter()) + { + collect_poly_ids(expr, &mut used); + } + } + + used +} + +fn used_from_public_declaration(decl: &PublicDeclaration, used: &mut HashSet) { + // Note: poly_id is filled in during condensation. + if let Some(poly_id) = decl.polynomial.poly_id { + used.insert(poly_id); + } +} + +fn collect_poly_ids(expr: &AlgebraicExpression, used: &mut HashSet) { + match expr { + AlgebraicExpression::Reference(r) => { + used.insert(r.poly_id); + } + AlgebraicExpression::BinaryOperation(op) => { + collect_poly_ids(&op.left, used); + collect_poly_ids(&op.right, used); + } + AlgebraicExpression::UnaryOperation(op) => { + collect_poly_ids(&op.expr, used); + } + AlgebraicExpression::PublicReference(_) + | AlgebraicExpression::Challenge(_) + | AlgebraicExpression::Number(_) => {} + } +} + +fn format_source(source: &SourceRef) -> String { + let file = source.file_name.as_ref().map(|s| s.as_ref()).unwrap_or(""); + format!("{file}:{}..{}", source.start, source.end) +} + + diff --git a/pil-analyzer/src/lib.rs b/pil-analyzer/src/lib.rs index 325a27721f..3f93aec375 100644 --- a/pil-analyzer/src/lib.rs +++ b/pil-analyzer/src/lib.rs @@ -2,6 +2,7 @@ mod call_graph; mod condenser; +mod dead_column_checker; pub mod evaluator; pub mod expression_processor; mod pil_analyzer; @@ -24,6 +25,7 @@ use powdr_ast::{ }; pub use pil_analyzer::{analyze_ast, analyze_file, analyze_string}; +pub use dead_column_checker::{check_no_dead_committed_columns, dead_committed_columns, DeadColumn}; pub trait AnalysisDriver: Clone + Copy { /// Turns a declaration into an absolute name. diff --git a/pil-analyzer/src/pil_analyzer.rs b/pil-analyzer/src/pil_analyzer.rs index c09bb70dc9..a35297cde1 100644 --- a/pil-analyzer/src/pil_analyzer.rs +++ b/pil-analyzer/src/pil_analyzer.rs @@ -5,7 +5,7 @@ use std::iter::once; use std::path::{Path, PathBuf}; use std::str::FromStr; -use itertools::Itertools; + use powdr_ast::parsed::asm::{ parse_absolute_path, AbsoluteSymbolPath, SymbolPath, }; @@ -23,7 +23,7 @@ use powdr_ast::analyzed::{ use powdr_parser::{parse_type}; use crate::type_inference::{infer_types, ExpectedType}; -use crate::{side_effect_checker, AnalysisDriver}; +use crate::{AnalysisDriver, check_no_dead_committed_columns, side_effect_checker}; use crate::statement_processor::{Counters, PILItem, StatementProcessor}; use crate::{condenser, evaluator, expression_processor::ExpressionProcessor}; @@ -51,7 +51,11 @@ fn analyze(files: Vec) -> Analyzed { analyzer.process(files); analyzer.side_effect_check(); analyzer.type_check(); - analyzer.condense() + let analyzed = analyzer.condense(); + if let Err(e) = check_no_dead_committed_columns(&analyzed) { + eprintln!("Error checking for dead committed columns: {e}"); + } + analyzed } #[derive(Default)] diff --git a/pil-analyzer/src/statement_processor.rs b/pil-analyzer/src/statement_processor.rs index d742622137..76944b1940 100644 --- a/pil-analyzer/src/statement_processor.rs +++ b/pil-analyzer/src/statement_processor.rs @@ -375,8 +375,6 @@ where } }; - println!("ATTR IN STATEMENT PROCESSOR {attribute:?}"); - vec![PILItem::Identity(Identity { id: self.counters.dispense_identity_id(), attribute, diff --git a/pil-analyzer/tests/dead_columns.rs b/pil-analyzer/tests/dead_columns.rs new file mode 100644 index 0000000000..c02b958c9b --- /dev/null +++ b/pil-analyzer/tests/dead_columns.rs @@ -0,0 +1,84 @@ +use powdr_number::GoldilocksField; +use powdr_pil_analyzer::{analyze_string, dead_committed_columns}; + +#[test] +fn dead_committed_columns_basic() { + let input = r#" + namespace N(16); + pol commit a; + pol commit b; + a = 1; + "#; + + let analyzed = analyze_string::(input); + let dead = dead_committed_columns(&analyzed); + + assert!(!dead.is_empty(), "expected no dead columns, got: {dead:?}"); + assert!(dead.iter().any(|d| d.name == "N.b"), "expected N.b to be dead, got: {dead:?}"); + assert!( + !dead.iter().any(|d| d.name == "N.a"), + "expected N.a to be used, got: {dead:?}" + ); +} + + +#[test] +fn dead_commited_test_1() { + let input = r#" + pol commit secret_value; + "#; + let analyzed = analyze_string::(input); + let dead = dead_committed_columns(&analyzed); + + assert!(!dead.is_empty(), "expected dead columns, got: {dead:?}"); + assert!(dead.iter().any(|d| d.name == "secret_value"), "expected secret_value to be dead, got: {dead:?}"); +} + +#[test] +fn used_in_itermidiate_then_constrained() { + let input = r#" + pol commit sel; + pol commit expected; + pol commit raw_value; + pol PROCESSED = raw_value * raw_value; + sel * (PROCESSED - expected) = 0; // raw_value is constrained through PROCESSED + "#; + let analyzed = analyze_string::(input); + let dead = dead_committed_columns(&analyzed); + + assert!(dead.is_empty(), "expected no dead columns, got: {dead:?}"); +} + +#[test] +fn used_in_intermediate_then_not_constrained() { + let input = r#" + pol commit raw_value; + pol PROCESSED = raw_value * raw_value; + "#; + let analyzed = analyze_string::(input); + let dead = dead_committed_columns(&analyzed); + + assert!(!dead.is_empty(), "expected dead columns, got: {dead:?}"); + assert!(dead.iter().any(|d| d.name == "raw_value"), "expected raw_value to be dead, got: {dead:?}"); +} + +#[test] +fn used_as_lookup_key_or_destination_is_not_dead() { + let input = r#" + namespace N(16); + pol commit sel; + pol commit query_value; + pol commit precomputed_value; + + // If sel = 1, require query_value to appear in the "table" precomputed_value. + // This uses both committed columns only through the lookup identity. + sel { query_value } in precomputed_value { precomputed_value }; + "#; + let analyzed = analyze_string::(input); + let dead = dead_committed_columns(&analyzed); + + assert!( + dead.is_empty(), + "expected no dead columns (all are referenced by a lookup), got: {dead:?}" + ); +} From b7cb09c899dc6149cd3139892764bf6efaa6fbd6 Mon Sep 17 00:00:00 2001 From: defkit Date: Mon, 12 Jan 2026 14:00:37 +0700 Subject: [PATCH 2/3] remove the vibe --- pil-analyzer/src/dead_column_checker.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pil-analyzer/src/dead_column_checker.rs b/pil-analyzer/src/dead_column_checker.rs index af05929ca7..a6e6b59b84 100644 --- a/pil-analyzer/src/dead_column_checker.rs +++ b/pil-analyzer/src/dead_column_checker.rs @@ -12,10 +12,6 @@ pub struct DeadColumn { /// Returns committed/witness columns (including array elements) that are declared but never used /// by any identity (constraints/lookups/permutations), nor by public declarations. -/// -/// Notes: -/// - Intermediate polynomials are inlined first, so columns used via intermediates are counted as used. -/// - Unused intermediate polynomials do NOT “rescue” witness columns from being considered dead. pub fn dead_committed_columns(analyzed: &Analyzed) -> Vec { let used_poly_ids = used_poly_ids(analyzed); From 152b4b461dd188695a439b04924c5a918014b2c0 Mon Sep 17 00:00:00 2001 From: defkit Date: Mon, 12 Jan 2026 14:19:16 +0700 Subject: [PATCH 3/3] clean test --- pil-analyzer/tests/dead_columns.rs | 35 +++++++++++++++++++----------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/pil-analyzer/tests/dead_columns.rs b/pil-analyzer/tests/dead_columns.rs index c02b958c9b..a648766355 100644 --- a/pil-analyzer/tests/dead_columns.rs +++ b/pil-analyzer/tests/dead_columns.rs @@ -13,12 +13,8 @@ fn dead_committed_columns_basic() { let analyzed = analyze_string::(input); let dead = dead_committed_columns(&analyzed); - assert!(!dead.is_empty(), "expected no dead columns, got: {dead:?}"); - assert!(dead.iter().any(|d| d.name == "N.b"), "expected N.b to be dead, got: {dead:?}"); - assert!( - !dead.iter().any(|d| d.name == "N.a"), - "expected N.a to be used, got: {dead:?}" - ); + assert!(dead.len() == 1, "expected 1 dead column, got: {dead:?}"); + assert!(dead[0].name == "N.b", "expected N.b to be dead, got: {dead:?}"); } @@ -30,8 +26,21 @@ fn dead_commited_test_1() { let analyzed = analyze_string::(input); let dead = dead_committed_columns(&analyzed); - assert!(!dead.is_empty(), "expected dead columns, got: {dead:?}"); - assert!(dead.iter().any(|d| d.name == "secret_value"), "expected secret_value to be dead, got: {dead:?}"); + assert!(dead.len() == 1, "expected 1 dead column, got: {dead:?}"); + assert!(dead[0].name == "secret_value", "expected secret_value to be dead, got: {dead:?}"); +} + +#[test] +fn dead_commited_test_2() { + let input = r#" + pol commit secret_value; + pol commit other_value; + "#; + let analyzed = analyze_string::(input); + let dead = dead_committed_columns(&analyzed); + assert!(dead.len() == 2, "expected 1 dead column, got: {dead:?}"); + assert!(dead[0].name == "secret_value", "expected secret_value to be dead, got: {dead:?}"); + assert!(dead[1].name == "other_value", "expected other_value to be dead, got: {dead:?}"); } #[test] @@ -46,7 +55,7 @@ fn used_in_itermidiate_then_constrained() { let analyzed = analyze_string::(input); let dead = dead_committed_columns(&analyzed); - assert!(dead.is_empty(), "expected no dead columns, got: {dead:?}"); + assert!(dead.len() == 0, "expected no dead columns, got: {dead:?}"); } #[test] @@ -58,8 +67,8 @@ fn used_in_intermediate_then_not_constrained() { let analyzed = analyze_string::(input); let dead = dead_committed_columns(&analyzed); - assert!(!dead.is_empty(), "expected dead columns, got: {dead:?}"); - assert!(dead.iter().any(|d| d.name == "raw_value"), "expected raw_value to be dead, got: {dead:?}"); + assert!(dead.len() == 1, "expected 1 dead column, got: {dead:?}"); + assert!(dead[0].name == "raw_value", "expected raw_value to be dead, got: {dead:?}"); } #[test] @@ -78,7 +87,7 @@ fn used_as_lookup_key_or_destination_is_not_dead() { let dead = dead_committed_columns(&analyzed); assert!( - dead.is_empty(), - "expected no dead columns (all are referenced by a lookup), got: {dead:?}" + dead.len() == 0, + "expected no dead columns, got: {dead:?}" ); }