Skip to content

Commit 3e06626

Browse files
committed
Detect (possibly incorrect) attempted procedure declarations
1 parent 28867a1 commit 3e06626

File tree

1 file changed

+149
-0
lines changed

1 file changed

+149
-0
lines changed

src/parsing/parser.rs

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2352,6 +2352,43 @@ fn is_procedure_declaration(content: &str) -> bool {
23522352
}
23532353
}
23542354

2355+
/// Detects any line that could potentially be a procedure declaration,
2356+
/// including malformed ones. Used for detecting the end-boundary of a
2357+
/// procedure.
2358+
///
2359+
/// The specific motivating case for using this instead of the strict
2360+
/// is_procedure_declaration() is that a malformed attempted declaration like
2361+
///
2362+
/// MyProcedure :
2363+
///
2364+
/// would be consumed as part of the previous procedure's body,
2365+
/// preventing us from attempting to parse it as a separate procedure and
2366+
/// reporting what turns out to be a better error.
2367+
fn potential_procedure_declaration(content: &str) -> bool {
2368+
match content.split_once(':') {
2369+
Some((before, _after)) => {
2370+
let before = before.trim_ascii();
2371+
// Check if it looks like an identifier (possibly with parameters)
2372+
// Accept any single token that could be an attempted identifier
2373+
if let Some((name, params)) = before.split_once('(') {
2374+
// Has parameters: check if params end with ')'
2375+
!name
2376+
.trim_ascii()
2377+
.is_empty()
2378+
&& params.ends_with(')')
2379+
} else {
2380+
// No parameters: must be a single token (no spaces) that
2381+
// looks identifier-ish This excludes sentences like "Ask
2382+
// these questions: ..."
2383+
!before.is_empty() &&
2384+
!before.contains(' ') && // Single token only
2385+
before.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
2386+
}
2387+
}
2388+
None => false,
2389+
}
2390+
}
2391+
23552392
fn is_procedure_body(content: &str) -> bool {
23562393
let line = content.trim_ascii();
23572394

@@ -3501,6 +3538,72 @@ This is the first one.
35013538
}
35023539
}
35033540

3541+
#[test]
3542+
fn test_potential_procedure_declaration_is_superset() {
3543+
// All valid procedure declarations must be matched by potential_procedure_declaration
3544+
3545+
// Valid simple declarations
3546+
assert!(is_procedure_declaration("foo : A -> B"));
3547+
assert!(potential_procedure_declaration("foo : A -> B"));
3548+
3549+
assert!(is_procedure_declaration("my_proc :"));
3550+
assert!(potential_procedure_declaration("my_proc :"));
3551+
3552+
assert!(is_procedure_declaration("step123 : Input -> Output"));
3553+
assert!(potential_procedure_declaration("step123 : Input -> Output"));
3554+
3555+
// Valid with parameters
3556+
assert!(is_procedure_declaration("process(a, b) : X -> Y"));
3557+
assert!(potential_procedure_declaration("process(a, b) : X -> Y"));
3558+
3559+
assert!(is_procedure_declaration("calc(x) :"));
3560+
assert!(potential_procedure_declaration("calc(x) :"));
3561+
3562+
// Invalid that should only match potential_
3563+
assert!(!is_procedure_declaration("MyProcedure :")); // Capital letter
3564+
assert!(potential_procedure_declaration("MyProcedure :"));
3565+
3566+
assert!(!is_procedure_declaration("123foo :")); // Starts with digit
3567+
assert!(potential_procedure_declaration("123foo :"));
3568+
3569+
// Neither should match sentences with spaces
3570+
assert!(!is_procedure_declaration("Ask these questions :"));
3571+
assert!(!potential_procedure_declaration("Ask these questions :"));
3572+
3573+
// Edge cases with whitespace
3574+
assert!(!is_procedure_declaration(" :")); // No name
3575+
assert!(!potential_procedure_declaration(" :"));
3576+
3577+
assert!(is_procedure_declaration(" foo : ")); // Whitespace around
3578+
assert!(potential_procedure_declaration(" foo : "));
3579+
3580+
// Verify the superset property systematically
3581+
let test_cases = vec![
3582+
"a :",
3583+
"z :",
3584+
"abc :",
3585+
"test_123 :",
3586+
"foo_bar_baz :",
3587+
"x() :",
3588+
"func(a) :",
3589+
"proc(a, b, c) :",
3590+
"test(x,y,z) :",
3591+
"a_1 :",
3592+
"test_ :",
3593+
"_test :", // Underscores
3594+
];
3595+
3596+
for case in test_cases {
3597+
if is_procedure_declaration(case) {
3598+
assert!(
3599+
potential_procedure_declaration(case),
3600+
"potential_procedure_declaration must match all valid declarations: {}",
3601+
case
3602+
);
3603+
}
3604+
}
3605+
}
3606+
35043607
#[test]
35053608
fn test_take_block_lines_procedure_wrapper() {
35063609
let mut input = Parser::new();
@@ -4459,6 +4562,52 @@ echo test
44594562
);
44604563
}
44614564

4565+
#[test]
4566+
fn parse_collecting_errors_basic() {
4567+
let mut input = Parser::new();
4568+
4569+
// Test with valid content - should have no errors
4570+
input.initialize("% technique v1\nvalid_proc : A -> B\n# Title\nDescription");
4571+
let result = input.parse_collecting_errors();
4572+
assert_eq!(
4573+
result
4574+
.errors
4575+
.len(),
4576+
0
4577+
);
4578+
assert!(result
4579+
.document
4580+
.header
4581+
.is_some());
4582+
assert!(result
4583+
.document
4584+
.body
4585+
.is_some());
4586+
4587+
// Test with invalid header - should collect header error
4588+
input.initialize("% wrong v1");
4589+
let result = input.parse_collecting_errors();
4590+
assert!(
4591+
result
4592+
.errors
4593+
.len()
4594+
> 0
4595+
);
4596+
assert!(result
4597+
.errors
4598+
.iter()
4599+
.any(|e| matches!(e, ParsingError::InvalidHeader(_))));
4600+
assert!(result
4601+
.document
4602+
.header
4603+
.is_none());
4604+
4605+
// Test that the method returns ParseResult instead of Result
4606+
input.initialize("some content");
4607+
let _result: ParseResult = input.parse_collecting_errors();
4608+
// If this compiles, the method signature is correct
4609+
}
4610+
44624611
#[test]
44634612
fn test_multiple_error_collection() {
44644613
use std::path::Path;

0 commit comments

Comments
 (0)