Skip to content

Commit 72f4bd9

Browse files
authored
Improve parsing errors (#58)
Having gone to the trouble in #57 to present richer error messages, we've needed to go through some refinement to actually emit these appropriate errors in various scenarios. This branch also adds some preliminary efforts to detect malformed or invalid input in such a way that we can guess the user's intent and give a better error as a result.
2 parents 779ad37 + 34e9a9b commit 72f4bd9

File tree

4 files changed

+314
-11
lines changed

4 files changed

+314
-11
lines changed

src/parsing/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ pub fn parse<'i>(filename: &'i Path, content: &'i str) -> Document<'i> {
4444
} else {
4545
debug!("No content found");
4646
}
47+
eprintln!("Ok");
4748

4849
document
4950
}

src/parsing/parser.rs

Lines changed: 70 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ impl<'i> ParsingError<'i> {
110110
format!("Expected matching character '{}'", end),
111111
format!(
112112
r#"
113-
The parser was expecting {} enclosed by '{}' and '{}' but there was no more
114-
input remaining in the current scope.
113+
The parser was expecting {} enclosed by '{}' and '{}' but
114+
there was no more input remaining in the current scope.
115115
"#,
116116
subject, start, end
117117
)
@@ -477,8 +477,22 @@ impl<'i> Parser<'i> {
477477
}
478478

479479
procedures.push(procedure);
480+
} else if self
481+
.source
482+
.contains(':')
483+
{
484+
// It might be that we've encountered a malformed procedure
485+
// declaration, so we try parsing it anyway to get a more
486+
// specific error message.
487+
let _procedure = self.take_block_lines(
488+
|_| true, // Accept the line regardless
489+
|line| is_section(line) || is_procedure_declaration(line),
490+
|inner| inner.read_procedure(),
491+
)?;
492+
// If we reach here, read_procedure() succeeded but
493+
// is_procedure_declaration() failed, which is undefined.
494+
return Err(ParsingError::IllegalParserState(self.offset));
480495
} else {
481-
// TODO: Handle unexpected content properly
482496
return Err(ParsingError::Unrecognized(self.offset));
483497
}
484498
}
@@ -978,8 +992,9 @@ impl<'i> Parser<'i> {
978992
} else {
979993
let mut params = Vec::new();
980994
for item in list.split(',') {
981-
let param = validate_identifier(item.trim_ascii())
982-
.ok_or(ParsingError::Unrecognized(self.offset))?;
995+
let param = validate_identifier(item.trim_ascii()).ok_or(
996+
ParsingError::InvalidIdentifier(self.offset, item.trim_ascii()),
997+
)?;
983998
params.push(param);
984999
}
9851000
Some(params)
@@ -1068,13 +1083,24 @@ impl<'i> Parser<'i> {
10681083
if !steps.is_empty() {
10691084
elements.push(Element::Steps(steps));
10701085
}
1086+
} else if is_invalid_step_pattern(content) {
1087+
// Detect and reject invalid step patterns
1088+
return Err(ParsingError::InvalidStep(outer.offset));
10711089
} else {
10721090
// Handle descriptive text
10731091
let description = outer.take_block_lines(
10741092
|line| {
1075-
!is_step(line) && !is_procedure_title(line) && !is_code_block(line)
1093+
!is_step(line)
1094+
&& !is_procedure_title(line)
1095+
&& !is_code_block(line)
1096+
&& !is_invalid_step_pattern(line)
1097+
},
1098+
|line| {
1099+
is_step(line)
1100+
|| is_procedure_title(line)
1101+
|| is_code_block(line)
1102+
|| is_invalid_step_pattern(line)
10761103
},
1077-
|line| is_step(line) || is_procedure_title(line) || is_code_block(line),
10781104
|inner| {
10791105
let content = inner.source;
10801106
if !content.is_empty() {
@@ -1669,6 +1695,7 @@ impl<'i> Parser<'i> {
16691695
|| is_subsubstep_dependent(line)
16701696
|| is_role_assignment(line)
16711697
|| is_enum_response(line)
1698+
|| is_invalid_response_pattern(line)
16721699
|| is_code_block(line)
16731700
},
16741701
|outer| {
@@ -1700,6 +1727,12 @@ impl<'i> Parser<'i> {
17001727
if c == '{' {
17011728
let expression = parser.read_code_block()?;
17021729
content.push(Descriptive::CodeInline(expression));
1730+
} else if parser
1731+
.source
1732+
.starts_with("```")
1733+
{
1734+
// Multiline blocks are not allowed in descriptive text
1735+
return Err(ParsingError::InvalidMultiline(parser.offset));
17031736
} else if c == '<' {
17041737
let invocation = parser.read_invocation()?;
17051738
parser.trim_whitespace();
@@ -1717,9 +1750,16 @@ impl<'i> Parser<'i> {
17171750
} else {
17181751
let text =
17191752
parser.take_until(&['{', '<', '~', '\n'], |inner| {
1720-
Ok(inner
1753+
let content = inner
17211754
.source
1722-
.trim_ascii())
1755+
.trim_ascii();
1756+
// Check for invalid multiline patterns in text
1757+
if content.contains("```") {
1758+
return Err(ParsingError::InvalidMultiline(
1759+
inner.offset,
1760+
));
1761+
}
1762+
Ok(content)
17231763
})?;
17241764
if text.is_empty() {
17251765
continue;
@@ -1841,7 +1881,13 @@ impl<'i> Parser<'i> {
18411881

18421882
if content.starts_with("```") {
18431883
let (lang, lines) = outer
1844-
.take_block_delimited("```", |inner| inner.parse_multiline_content())?;
1884+
.take_block_delimited("```", |inner| inner.parse_multiline_content())
1885+
.map_err(|err| match err {
1886+
ParsingError::Expected(offset, "the corresponding end delimiter") => {
1887+
ParsingError::InvalidMultiline(offset)
1888+
}
1889+
_ => err,
1890+
})?;
18451891
params.push(Expression::Multiline(lang, lines));
18461892
} else if content.starts_with("\"") {
18471893
let raw = outer
@@ -1962,6 +2008,8 @@ impl<'i> Parser<'i> {
19622008
} else if is_code_block(content) {
19632009
let block = self.read_code_scope()?;
19642010
scopes.push(block);
2011+
} else if is_invalid_response_pattern(content) {
2012+
return Err(ParsingError::InvalidResponse(self.offset));
19652013
} else if is_enum_response(content) {
19662014
let responses = self.read_responses()?;
19672015
scopes.push(Scope::ResponseBlock { responses });
@@ -2242,6 +2290,12 @@ fn is_step(content: &str) -> bool {
22422290
is_step_dependent(content) || is_step_parallel(content)
22432291
}
22442292

2293+
/// Detect patterns that look like steps but are invalid at the top-level
2294+
fn is_invalid_step_pattern(content: &str) -> bool {
2295+
let re = regex!(r"^\s*([a-zA-Z]|[ivxIVX]+)\.\s+");
2296+
re.is_match(content)
2297+
}
2298+
22452299
fn is_section(content: &str) -> bool {
22462300
let re = regex!(r"^\s*([IVX]+)\.\s+");
22472301
re.is_match(content)
@@ -2281,6 +2335,12 @@ fn is_enum_response(content: &str) -> bool {
22812335
re.is_match(content)
22822336
}
22832337

2338+
/// Detect response patterns with double quotes
2339+
fn is_invalid_response_pattern(content: &str) -> bool {
2340+
let re = regex!(r#"^\s*".+?"(\s*\|\s*".+?")*"#);
2341+
re.is_match(content)
2342+
}
2343+
22842344
fn is_numeric(content: &str) -> bool {
22852345
let integral = regex!(r"^\s*-?[0-9]+(\.[0-9]+)?\s*$");
22862346
let scientific = regex!(r"^\s*-?[0-9]+(\.[0-9]+)?(\s*[a-zA-Z°μ]|\s*±|\s*×|\s*x\s*10)");

0 commit comments

Comments
 (0)