@@ -3581,8 +3581,13 @@ impl Analyzer {
35813581 weak_points : & mut Vec < WeakPoint > ,
35823582 file_path : & str ,
35833583 ) -> Result < ( ) > {
3584- // Isabelle uses '(* *)' block comments only — no line comments.
3585- let code = strip_proof_comments ( content, "" , Some ( ( "(*" , "*)" ) ) ) ;
3584+ // Isabelle uses '(* *)' block comments AND `text \<open>...\<close>`
3585+ // prose declarations AND `@{text ...}` antiquotations. All three must
3586+ // be stripped before tactic-keyword counting: docstrings discussing
3587+ // "zero `sorry`" or "all `oops` placeholders" are valid prose, not
3588+ // proof-escape hatches. See #43.
3589+ let without_blocks = strip_proof_comments ( content, "" , Some ( ( "(*" , "*)" ) ) ) ;
3590+ let code = strip_isabelle_prose ( & without_blocks) ;
35863591 // sorry — Isabelle's admitted-proof escape hatch (banned estate-wide)
35873592 let sorry_count = code. matches ( "sorry" ) . count ( ) ;
35883593 if sorry_count > 0 {
@@ -5526,6 +5531,133 @@ fn strip_proof_comments(content: &str, line_marker: &str, block: Option<(&str, &
55265531 out
55275532}
55285533
5534+ /// Strip Isabelle prose constructs that are documentation, not proof code:
5535+ ///
5536+ /// 1. `@{text ...}` antiquotations — used inside prose blocks AND inside
5537+ /// `(* ... *)` comments to refer to proof tactics by name. Brace-balanced.
5538+ /// 2. Prose-block cartouches: `chapter`, `section`, `subsection`,
5539+ /// `subsubsection`, `paragraph`, and `text` followed by a
5540+ /// `\<open>...\<close>` cartouche. Cartouches nest, so depth-count.
5541+ ///
5542+ /// `(* ... *)` comments must be stripped separately via `strip_proof_comments`.
5543+ /// The order is significant: strip `(* *)` first, then this function, because
5544+ /// a `\<open>...\<close>` cartouche inside a `(* *)` comment is already gone.
5545+ ///
5546+ /// Conservative: only strips cartouches that follow a prose-block keyword.
5547+ /// Cartouches used as string literals inside tactics (rare) are kept.
5548+ fn strip_isabelle_prose ( content : & str ) -> String {
5549+ const OPEN : & str = "\\ <open>" ;
5550+ const CLOSE : & str = "\\ <close>" ;
5551+ const PROSE_KEYWORDS : [ & str ; 6 ] = [
5552+ "chapter" ,
5553+ "subsubsection" ,
5554+ "subsection" ,
5555+ "section" ,
5556+ "paragraph" ,
5557+ "text" ,
5558+ ] ;
5559+
5560+ let mut out = String :: with_capacity ( content. len ( ) ) ;
5561+ let mut rest = content;
5562+
5563+ loop {
5564+ let aq_pos = rest. find ( "@{text" ) ;
5565+ let mut prose_pos: Option < ( usize , usize ) > = None ;
5566+ for kw in & PROSE_KEYWORDS {
5567+ if let Some ( found) = find_prose_block ( rest, kw, OPEN ) {
5568+ prose_pos = match prose_pos {
5569+ Some ( ( existing, _) ) if existing <= found. 0 => prose_pos,
5570+ _ => Some ( found) ,
5571+ } ;
5572+ }
5573+ }
5574+
5575+ let next = match ( aq_pos, prose_pos) {
5576+ ( None , None ) => {
5577+ out. push_str ( rest) ;
5578+ return out;
5579+ }
5580+ ( Some ( a) , None ) => ( a, skip_antiquotation_end ( rest, a) ) ,
5581+ ( None , Some ( ( p_start, p_after_open) ) ) => {
5582+ ( p_start, skip_cartouche_end ( rest, p_after_open, OPEN , CLOSE ) )
5583+ }
5584+ ( Some ( a) , Some ( ( p_start, p_after_open) ) ) => {
5585+ if a <= p_start {
5586+ ( a, skip_antiquotation_end ( rest, a) )
5587+ } else {
5588+ ( p_start, skip_cartouche_end ( rest, p_after_open, OPEN , CLOSE ) )
5589+ }
5590+ }
5591+ } ;
5592+
5593+ out. push_str ( & rest[ ..next. 0 ] ) ;
5594+ out. push ( ' ' ) ;
5595+ rest = & rest[ next. 1 ..] ;
5596+ }
5597+ }
5598+
5599+ /// Find `keyword` (preceded by start-of-string or whitespace) followed by
5600+ /// optional whitespace + `open_marker`. Returns `(keyword_start, after_open)`.
5601+ fn find_prose_block ( haystack : & str , keyword : & str , open_marker : & str ) -> Option < ( usize , usize ) > {
5602+ let mut search_from = 0 ;
5603+ while let Some ( rel) = haystack[ search_from..] . find ( keyword) {
5604+ let abs = search_from + rel;
5605+ let preceded_ok = abs == 0
5606+ || haystack[ ..abs]
5607+ . chars ( )
5608+ . last ( )
5609+ . is_some_and ( |c| c. is_whitespace ( ) ) ;
5610+ let after_kw = abs + keyword. len ( ) ;
5611+ let after_ws: usize = haystack[ after_kw..]
5612+ . chars ( )
5613+ . take_while ( |c| c. is_whitespace ( ) )
5614+ . map ( char:: len_utf8)
5615+ . sum ( ) ;
5616+ let open_at = after_kw + after_ws;
5617+ if preceded_ok && haystack[ open_at..] . starts_with ( open_marker) {
5618+ return Some ( ( abs, open_at + open_marker. len ( ) ) ) ;
5619+ }
5620+ search_from = abs + keyword. len ( ) ;
5621+ }
5622+ None
5623+ }
5624+
5625+ /// Given an `@{text` start at `start`, find the byte just past the matching `}`.
5626+ fn skip_antiquotation_end ( haystack : & str , start : usize ) -> usize {
5627+ let after = start + "@{text" . len ( ) ;
5628+ let bytes = haystack. as_bytes ( ) ;
5629+ let mut depth: i32 = 1 ;
5630+ let mut j = after;
5631+ while j < bytes. len ( ) && depth > 0 {
5632+ match bytes[ j] {
5633+ b'{' => depth += 1 ,
5634+ b'}' => depth -= 1 ,
5635+ _ => { }
5636+ }
5637+ j += 1 ;
5638+ }
5639+ j
5640+ }
5641+
5642+ /// Given a position just past the first `\<open>` (depth = 1), scan forward and
5643+ /// return the byte index just past the matching `\<close>`. Nesting-aware.
5644+ fn skip_cartouche_end ( haystack : & str , after_open : usize , open : & str , close : & str ) -> usize {
5645+ let mut depth: i32 = 1 ;
5646+ let mut j = after_open;
5647+ while j < haystack. len ( ) && depth > 0 {
5648+ if haystack[ j..] . starts_with ( open) {
5649+ depth += 1 ;
5650+ j += open. len ( ) ;
5651+ } else if haystack[ j..] . starts_with ( close) {
5652+ depth -= 1 ;
5653+ j += close. len ( ) ;
5654+ } else {
5655+ j += 1 ;
5656+ }
5657+ }
5658+ j
5659+ }
5660+
55295661/// Count Rocq `Axiom`/`Parameter` declarations that represent genuine
55305662/// unverified postulates, excluding the two legitimate scaffold shapes:
55315663///
@@ -5737,6 +5869,91 @@ mod tests {
57375869 use std:: fs;
57385870 use tempfile:: TempDir ;
57395871
5872+ // ---------------------------------------------------------------
5873+ // 0. Isabelle prose-stripping (regression for #43 false positives)
5874+ // ---------------------------------------------------------------
5875+
5876+ #[ test]
5877+ fn isabelle_strip_text_antiquotation_inside_block_comment ( ) {
5878+ // Tropical_Kleene.thy:663 — sorry appears only in `@{text sorry}` antiquotation
5879+ let stripped = strip_isabelle_prose ( "(* All proofs are complete — zero @{text sorry}. *)" ) ;
5880+ // Outer (* *) is stripped separately; this helper handles the antiquotation
5881+ assert ! (
5882+ !stripped. contains( "sorry" ) ,
5883+ "@{{text sorry}} should be elided, got: {stripped:?}"
5884+ ) ;
5885+ }
5886+
5887+ #[ test]
5888+ fn isabelle_strip_cartouche_after_text_keyword ( ) {
5889+ // Tropical_CNO.thy:29 — text \<open>All sorry placeholders have been closed.\<close>
5890+ let src = "text \\ <open>All sorry placeholders have been closed.\\ <close>\n lemma foo: True by simp" ;
5891+ let stripped = strip_isabelle_prose ( src) ;
5892+ assert ! ( !stripped. contains( "sorry" ) , "got: {stripped:?}" ) ;
5893+ assert ! ( stripped. contains( "lemma foo" ) ) ;
5894+ }
5895+
5896+ #[ test]
5897+ fn isabelle_strip_section_cartouche ( ) {
5898+ let src = "section \\ <open>oops as a tactic\\ <close>\n lemma bar: True by simp" ;
5899+ let stripped = strip_isabelle_prose ( src) ;
5900+ assert ! ( !stripped. contains( "oops" ) ) ;
5901+ assert ! ( stripped. contains( "lemma bar" ) ) ;
5902+ }
5903+
5904+ #[ test]
5905+ fn isabelle_strip_subsection_subsubsection_paragraph_chapter ( ) {
5906+ for kw in [ "chapter" , "subsection" , "subsubsection" , "paragraph" ] {
5907+ let src = format ! ( "{kw} \\ <open>discuss sorry\\ <close>\n real_code" ) ;
5908+ let stripped = strip_isabelle_prose ( & src) ;
5909+ assert ! (
5910+ !stripped. contains( "sorry" ) ,
5911+ "{kw} cartouche not stripped: {stripped:?}"
5912+ ) ;
5913+ }
5914+ }
5915+
5916+ #[ test]
5917+ fn isabelle_preserve_real_sorry_outside_prose ( ) {
5918+ // A genuine `sorry` proof tactic must still be visible.
5919+ let src = "lemma broken: False\n sorry" ;
5920+ let stripped = strip_isabelle_prose ( src) ;
5921+ assert ! (
5922+ stripped. contains( "sorry" ) ,
5923+ "genuine sorry was wrongly stripped: {stripped:?}"
5924+ ) ;
5925+ }
5926+
5927+ #[ test]
5928+ fn isabelle_nested_cartouches ( ) {
5929+ // \<open> can nest. A cartouche containing another cartouche
5930+ // should be fully consumed.
5931+ let src = "text \\ <open>outer \\ <open>inner sorry\\ <close> tail\\ <close>\n code" ;
5932+ let stripped = strip_isabelle_prose ( src) ;
5933+ assert ! ( !stripped. contains( "sorry" ) ) ;
5934+ assert ! ( stripped. contains( "code" ) ) ;
5935+ }
5936+
5937+ #[ test]
5938+ fn isabelle_antiquotation_with_nested_braces ( ) {
5939+ let src = "text \\ <open>see @{text \" sorry { x = 1 }\" } below\\ <close>\n code" ;
5940+ let stripped = strip_isabelle_prose ( src) ;
5941+ assert ! ( !stripped. contains( "sorry" ) ) ;
5942+ assert ! ( stripped. contains( "code" ) ) ;
5943+ }
5944+
5945+ #[ test]
5946+ fn isabelle_keyword_not_at_word_boundary ( ) {
5947+ // `subsection` should match, but a function called `mysection`
5948+ // (no leading whitespace before "section") should NOT.
5949+ let src = "mysection_lemma: True by sorry -- real tactic\n " ;
5950+ let stripped = strip_isabelle_prose ( src) ;
5951+ assert ! (
5952+ stripped. contains( "sorry" ) ,
5953+ "false positive on identifier ending in 'section': {stripped:?}"
5954+ ) ;
5955+ }
5956+
57405957 // ---------------------------------------------------------------
57415958 // 1. Language::detect for different file extensions
57425959 // ---------------------------------------------------------------
0 commit comments