From b4d6662e31a57f0a450366af40292b2cf10c861c Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 14:10:24 -0500 Subject: [PATCH 01/33] begin work on repeating paragraphs --- .github/workflows/tests.yaml | 13 +- AGENTS.md | 24 + examples/cli/input.md | 2 +- examples/cli/schema.md | 2 +- src/mdschema/validator/errors.rs | 28 +- src/mdschema/validator/matcher/matcher.rs | 7 +- .../helpers/check_repeating_matchers.rs | 2 +- .../helpers/compare_text_contents.rs | 64 +- .../count_non_literal_matchers_in_children.rs | 156 +++ .../helpers/expected_input_nodes.rs | 2 +- .../validator/node_walker/helpers/mod.rs | 13 +- .../helpers/node_children_lengths.rs | 2 +- src/mdschema/validator/node_walker/utils.rs | 2 +- .../validator/node_walker/validators/code.rs | 26 +- .../node_walker/validators/containers.rs | 481 ++++++++ .../node_walker/validators/headings.rs | 27 +- .../validator/node_walker/validators/links.rs | 227 ++-- .../validator/node_walker/validators/lists.rs | 839 +++++++------ .../node_walker/validators/matchers.rs | 1090 ++++++++++------- .../validator/node_walker/validators/mod.rs | 19 +- .../validator/node_walker/validators/nodes.rs | 38 +- .../node_walker/validators/quotes.rs | 10 +- .../node_walker/validators/tables.rs | 14 +- .../node_walker/validators/textual.rs | 14 +- .../validators/textual_container.rs | 383 ------ src/mdschema/validator/ts_utils.rs | 8 +- tests/misc.rs | 28 +- 27 files changed, 2035 insertions(+), 1486 deletions(-) create mode 100644 AGENTS.md create mode 100644 src/mdschema/validator/node_walker/helpers/count_non_literal_matchers_in_children.rs create mode 100644 src/mdschema/validator/node_walker/validators/containers.rs delete mode 100644 src/mdschema/validator/node_walker/validators/textual_container.rs diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 3a32f2b..f2d50fd 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,35 +1,28 @@ name: Tests - on: push: - jobs: test: runs-on: ubuntu-latest - permissions: contents: read actions: write - steps: - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Install Nix uses: cachix/install-nix-action@v31 with: extra_nix_config: | experimental-features = nix-command flakes - - name: Restore Nix store id: nix-cache uses: nix-community/cache-nix-action/restore@v7 with: primary-key: nix-${{ runner.os }}-${{ hashFiles('**/flake.lock') }} restore-prefixes-first-match: nix-${{ runner.os }}- - - name: Restore Cargo cache uses: actions/cache/restore@v4 id: cargo-cache @@ -43,13 +36,10 @@ jobs: key: cargo-${{ runner.os }}-${{ hashFiles('**/Cargo.lock') }} restore-keys: | cargo-${{ runner.os }}- - - name: Build project run: nix develop --command cargo build - - name: Run tests run: nix develop --command cargo test - - name: Save Cargo cache if: always() uses: actions/cache/save@v4 @@ -61,7 +51,6 @@ jobs: ~/.cargo/git/db/ target/ key: ${{ steps.cargo-cache.outputs.cache-primary-key }} - - name: Save Nix store uses: nix-community/cache-nix-action/save@v7 if: always() @@ -72,4 +61,4 @@ jobs: purge-prefixes: nix-${{ runner.os }}- purge-created: 0 purge-last-accessed: P7D - purge-primary-key: never \ No newline at end of file + purge-primary-key: never diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..ebbc638 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,24 @@ +# Agent Guidelines + +## ts_types imports +- Always import `ts_types` via wildcard (`use crate::mdschema::validator::ts_types::*;`) so we do not list individual members. + +## Test imports +- Prefer `super::...` imports inside `#[cfg(test)]` modules (e.g., `super::test_utils::ValidatorTester` or `super::TextualVsTextualValidator`) so the tests stay concise. +- Keep using wildcard `ts_types::*` in tests as well. + +## Documentation +- When a doc block lists both `schema_str` and `input_str`, use the exact wording: + - `schema_str`: The full input document (so far). + - `input_str`: The full schema document. +- For any doc line that mentions `got_eof`, use `/// * `got_eof`: Whether we have received the full input document.` verbatim. + +## Walker usage +- Never add aliases such as `let schema_str = walker.schema_str()` or `let input_str = walker.input_str()`; call the walker methods directly. + +## Contributing-from-CONTRIBUTING.md +- When we talk about a data structure that stores references to schema and input, keep the schema entry first. +- Prefer `get_node_text` from `ts_utils` over calling `utf8_text` directly. +- In tests, keep assertion order consistent: position assertions first, followed by errors, then values. +- Avoid `ValidationResult::destruct`; use accessors like `result.errors()`, `result.value()`, or `result.farthest_reached_pos()`. +- When debugging tests, call `test_logging!();` (from `utils.rs`) at the top of the suite to hydrate logs and trace output. diff --git a/examples/cli/input.md b/examples/cli/input.md index 792d600..b33c560 100644 --- a/examples/cli/input.md +++ b/examples/cli/input.md @@ -1 +1 @@ -# +test test diff --git a/examples/cli/schema.md b/examples/cli/schema.md index 792d600..ec476a0 100644 --- a/examples/cli/schema.md +++ b/examples/cli/schema.md @@ -1 +1 @@ -# +`test:/test/`{,} test diff --git a/src/mdschema/validator/errors.rs b/src/mdschema/validator/errors.rs index b6a8c7c..7983052 100644 --- a/src/mdschema/validator/errors.rs +++ b/src/mdschema/validator/errors.rs @@ -196,11 +196,6 @@ pub enum SchemaError { /// A repeating matcher in a textual container RepeatingMatcherInTextContainer { schema_index: usize }, - /// List node uses a non-repeating matcher. - /// - /// List nodes must use matchers with repetition syntax like `{1,}`. - BadListMatcher { schema_index: usize }, - /// Matcher has invalid extras syntax. /// /// For example, `test:/1/`!{1,2} is invalid. @@ -277,9 +272,7 @@ impl fmt::Display for SchemaError { SchemaError::RepeatingMatcherInTextContainer { .. } => { write!(f, "Repeating matcher cannot be used in text container") } - SchemaError::BadListMatcher { .. } => { - write!(f, "List node requires repeating matcher syntax") - } + SchemaError::InvalidMatcherExtras { error, .. } => { write!(f, "Invalid matcher extras: {}", error) } @@ -770,25 +763,6 @@ You can mark a list node as repeating by adding a '{,} dir .with_help("Text containers like paragraphs and headings cannot contain repeating matchers. Use repetition syntax only with list items.") .finish() } - SchemaError::BadListMatcher { schema_index } => { - let schema_node = find_node_by_index(tree.root_node(), *schema_index); - let schema_content = - node_content_by_index(tree.root_node(), *schema_index, source_content)?; - let schema_range = schema_node.start_byte()..schema_node.end_byte(); - - Report::build(ReportKind::Error, (filename, schema_range.clone())) - .with_message("Bad list matcher") - .with_label( - Label::new((filename, schema_range)) - .with_message(format!( - "No matchers found in children of list node: '{}'", - schema_content - )) - .with_color(Color::Red), - ) - .with_help("List nodes require repeating matcher syntax like `label:/pattern/`{1,}") - .finish() - } SchemaError::UnclosedMatcher { schema_index } => { let schema_node = find_node_by_index(tree.root_node(), *schema_index); let schema_range = schema_node.start_byte()..schema_node.end_byte(); diff --git a/src/mdschema/validator/matcher/matcher.rs b/src/mdschema/validator/matcher/matcher.rs index 8febb61..158b1cb 100644 --- a/src/mdschema/validator/matcher/matcher.rs +++ b/src/mdschema/validator/matcher/matcher.rs @@ -8,7 +8,7 @@ use tree_sitter::TreeCursor; use crate::mdschema::validator::{ matcher::matcher_extras::{MatcherExtrasError, partition_at_special_chars}, - ts_types::is_text_node, + ts_types::*, ts_utils::{get_next_node, get_node_and_next_node, get_node_text}, }; @@ -210,6 +210,11 @@ impl Matcher { schema_cursor: &TreeCursor, schema_str: &str, ) -> Result { + #[cfg(feature = "invariant_violations")] + if !is_inline_code_node(&schema_cursor.node()) { + invariant_violation!("expected inline code node for extracting a matcher"); + } + let pattern_str = get_node_text(&schema_cursor.node(), schema_str); let next_node = get_next_node(schema_cursor); let extras_str = next_node diff --git a/src/mdschema/validator/node_walker/helpers/check_repeating_matchers.rs b/src/mdschema/validator/node_walker/helpers/check_repeating_matchers.rs index 5ff46c5..0ceb1ea 100644 --- a/src/mdschema/validator/node_walker/helpers/check_repeating_matchers.rs +++ b/src/mdschema/validator/node_walker/helpers/check_repeating_matchers.rs @@ -2,7 +2,7 @@ use tree_sitter::TreeCursor; use crate::mdschema::validator::{ matcher::matcher::{Matcher, MatcherError}, - ts_types::is_inline_code_node, + ts_types::*, }; pub fn check_repeating_matchers(schema_cursor: &TreeCursor, schema_str: &str) -> Option { diff --git a/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs b/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs index 05feb67..8d9d4ea 100644 --- a/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs +++ b/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs @@ -1,7 +1,9 @@ use serde_json::json; use tree_sitter::TreeCursor; -use crate::mdschema::validator::errors::{NodeContentMismatchKind, SchemaError, SchemaViolationError, ValidationError}; +use crate::mdschema::validator::errors::{ + NodeContentMismatchKind, SchemaError, SchemaViolationError, ValidationError, +}; use crate::mdschema::validator::matcher::matcher::MatcherError; use crate::mdschema::validator::node_walker::ValidationResult; use crate::mdschema::validator::node_walker::helpers::curly_matchers::extract_matcher_from_curly_delineated_text; @@ -11,8 +13,8 @@ use crate::mdschema::validator::ts_utils::get_node_text; /// Handles both literal text and curly-delimited matchers. /// /// # Arguments -/// - `schema_str`: The full schema markdown string -/// - `input_str`: The full input markdown string +/// * `schema_str`: The full input document (so far). +/// * `input_str`: The full schema document. /// - `schema_cursor`: Cursor at schema text node /// - `input_cursor`: Cursor at input text node /// - `is_partial_match`: Whether we're doing a partial match (not at EOF) @@ -187,7 +189,14 @@ mod tests { schema_cursor.goto_first_child(); input_cursor.goto_first_child(); - let result = compare_text_contents(schema_str, input_str, &schema_cursor, &input_cursor, false, false); + let result = compare_text_contents( + schema_str, + input_str, + &schema_cursor, + &input_cursor, + false, + false, + ); // Result depends on whether we found matching nodes, so just verify it doesn't panic let _ = result; } @@ -209,7 +218,14 @@ mod tests { input_cursor.goto_first_child(); // With strip_extras=true, should handle the "!" prefix - let result = compare_text_contents(schema_str, input_str, &schema_cursor, &input_cursor, false, true); + let result = compare_text_contents( + schema_str, + input_str, + &schema_cursor, + &input_cursor, + false, + true, + ); // Just verify no panic let _ = result; } @@ -231,7 +247,14 @@ mod tests { input_cursor.goto_first_child(); // With is_partial_match=true, partial content should be acceptable - let result = compare_text_contents(schema_str, input_str, &schema_cursor, &input_cursor, true, false); + let result = compare_text_contents( + schema_str, + input_str, + &schema_cursor, + &input_cursor, + true, + false, + ); let _ = result; } @@ -253,10 +276,21 @@ mod tests { schema_cursor.goto_first_child(); input_cursor.goto_first_child(); - let result = compare_text_contents(schema_str, input_str, &schema_cursor, &input_cursor, false, false); + let result = compare_text_contents( + schema_str, + input_str, + &schema_cursor, + &input_cursor, + false, + false, + ); // Should match and capture - assert!(!result.has_errors(), "Expected no errors but got: {:?}", result.errors()); + assert!( + !result.has_errors(), + "Expected no errors but got: {:?}", + result.errors() + ); assert_eq!(result.value(), &json!({"test": "testing"})); } @@ -276,10 +310,20 @@ mod tests { schema_cursor.goto_first_child(); input_cursor.goto_first_child(); - let result = compare_text_contents(schema_str, input_str, &schema_cursor, &input_cursor, false, false); + let result = compare_text_contents( + schema_str, + input_str, + &schema_cursor, + &input_cursor, + false, + false, + ); // Should have an error - assert!(result.has_errors(), "Expected error for non-matching matcher"); + assert!( + result.has_errors(), + "Expected error for non-matching matcher" + ); assert_eq!(result.errors().len(), 1); } } diff --git a/src/mdschema/validator/node_walker/helpers/count_non_literal_matchers_in_children.rs b/src/mdschema/validator/node_walker/helpers/count_non_literal_matchers_in_children.rs new file mode 100644 index 0000000..274d549 --- /dev/null +++ b/src/mdschema/validator/node_walker/helpers/count_non_literal_matchers_in_children.rs @@ -0,0 +1,156 @@ +use tree_sitter::TreeCursor; + +use crate::mdschema::validator::{ + errors::{SchemaError, ValidationError}, + matcher::{ + matcher::{Matcher, MatcherError}, + matcher_extras::get_all_extras, + }, + ts_types::*, + ts_utils::{get_next_node, get_node_text}, +}; + +/// Check whether a paragraph is a repeated paragraph matcher. +/// +/// A paragraph is a repeated paragraph matcher if it has a single child, which +/// is a a repeated matcher. +/// +/// For example, +/// +/// ``` +/// `test:/test/`{,} +/// ``` +/// +/// Contains a document with one child, which is a repeated paragraph matcher, +/// whereas +/// +/// ``` +/// `test:/test/` test +/// ``` +/// +/// Contains a document with one child, which is just a normal paragraph with a +/// matcher in it. + +/// Count the number of matchers, starting at some cursor pointing to a textual +/// container, and iterating through all of its children. +/// +/// Returns the number of matchers, or a `ValidationError` that is probably a +/// `MatcherError` due to failing to construct a matcher given a code node that +/// is not marked as literal. +pub fn count_non_literal_matchers_in_children( + schema_cursor: &TreeCursor, + schema_str: &str, +) -> Result { + let mut count = 0; + let mut cursor = schema_cursor.clone(); + + cursor.goto_first_child(); + + loop { + if !is_inline_code_node(&cursor.node()) { + if !cursor.goto_next_sibling() { + break; + } else { + continue; + } + } + + // If the following node is a text node, then it may have extras, so grab them. + let extras_str = match get_next_node(&cursor) + .filter(|n| is_text_node(n)) + .map(|next_node| { + let next_node_str = get_node_text(&next_node, schema_str); + get_all_extras(next_node_str) + }) { + Some(Ok(extras)) => Some(extras), + Some(Err(error)) => { + return Err(ValidationError::SchemaError(SchemaError::MatcherError { + error: error.into(), + schema_index: schema_cursor.descendant_index(), + })); + } + None => None, + }; + + let pattern_str = get_node_text(&cursor.node(), schema_str); + + match Matcher::try_from_pattern_and_suffix_str(pattern_str, extras_str) { + Ok(_) => count += 1, + Err(MatcherError::WasLiteralCode) => { + // Don't count it, but this is an OK error + } + Err(err) => { + return Err(ValidationError::SchemaError(SchemaError::MatcherError { + error: err, + schema_index: cursor.descendant_index(), + })); + } + } + + if !cursor.goto_next_sibling() { + break; + } + } + + Ok(count) +} + +#[cfg(test)] +mod tests { + use crate::mdschema::validator::{ + errors::{SchemaError, ValidationError}, + matcher::matcher::MatcherError, + node_walker::helpers::count_non_literal_matchers_in_children::count_non_literal_matchers_in_children, + ts_utils::parse_markdown, + }; + + #[test] + fn test_count_non_literal_matchers_in_children_invalid_matcher() { + let schema_str = "test `_*test*_`"; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); + schema_cursor.goto_first_child(); + + match count_non_literal_matchers_in_children(&schema_cursor, schema_str).unwrap_err() { + ValidationError::SchemaError(SchemaError::MatcherError { + error, + schema_index, + }) => { + assert_eq!(schema_index, 3); // the index of the code_span + match error { + MatcherError::MatcherInteriorRegexInvalid(_) => {} + _ => panic!("Expected MatcherInteriorRegexInvalid error"), + } + } + _ => panic!("Expected InvalidMatcher error"), + } + } + + #[test] + fn test_count_non_literal_matchers_in_children_only_literal_matcher() { + let schema_str = "test `_*test*_`! `test:/test/`"; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); + schema_cursor.goto_first_child(); + + assert_eq!( + count_non_literal_matchers_in_children(&schema_cursor, schema_str).unwrap(), + 1 // one is literal + ); + } + + #[test] + fn test_count_non_literal_matchers_in_children_no_matchers() { + let schema_str = "test *foo* _bar_"; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); + + assert_eq!( + count_non_literal_matchers_in_children(&schema_cursor, schema_str).unwrap(), + 0 + ); + } +} diff --git a/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs b/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs index 3c970b3..7d22473 100644 --- a/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs +++ b/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs @@ -7,7 +7,7 @@ use crate::mdschema::validator::{ matcher::{Matcher, MatcherError}, matcher_extras::{get_after_extras, get_all_extras}, }, - ts_types::{is_inline_code_node, is_text_node}, + ts_types::*, ts_utils::{get_next_node, get_node_text}, }; diff --git a/src/mdschema/validator/node_walker/helpers/mod.rs b/src/mdschema/validator/node_walker/helpers/mod.rs index f630949..c8438ec 100644 --- a/src/mdschema/validator/node_walker/helpers/mod.rs +++ b/src/mdschema/validator/node_walker/helpers/mod.rs @@ -1,6 +1,7 @@ -pub mod check_repeating_matchers; -pub mod curly_matchers; -pub mod expected_input_nodes; -pub mod node_children_lengths; -pub mod compare_node_kinds; -pub mod compare_text_contents; +pub(crate) mod check_repeating_matchers; +pub(crate) mod compare_node_kinds; +pub(crate) mod compare_text_contents; +pub(crate) mod count_non_literal_matchers_in_children; +pub(crate) mod curly_matchers; +pub(crate) mod expected_input_nodes; +pub(crate) mod node_children_lengths; diff --git a/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs b/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs index 6c5096c..178e93c 100644 --- a/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs +++ b/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs @@ -11,7 +11,7 @@ use crate::mdschema::validator::errors::{ChildrenCount, SchemaViolationError, Va /// # Arguments /// - `schema_cursor`: Cursor at schema node /// - `input_cursor`: Cursor at input node -/// - `got_eof`: Whether we've reached end of file +/// * `got_eof`: Whether we have received the full input document. pub fn compare_node_children_lengths( schema_cursor: &TreeCursor, input_cursor: &TreeCursor, diff --git a/src/mdschema/validator/node_walker/utils.rs b/src/mdschema/validator/node_walker/utils.rs index 7f7ff91..7f356f8 100644 --- a/src/mdschema/validator/node_walker/utils.rs +++ b/src/mdschema/validator/node_walker/utils.rs @@ -4,9 +4,9 @@ use tree_sitter::TreeCursor; use serde_json::Value; use crate::mdschema::validator::ts_utils::walk_to_root; -use mdvalidate_utils::PrettyPrint; #[cfg(test)] use crate::mdschema::validator::{errors::ValidationError, validator::Validator}; +use mdvalidate_utils::PrettyPrint; #[cfg(test)] pub fn validate_str(schema: &str, input: &str) -> (Value, Vec, Validator) { diff --git a/src/mdschema/validator/node_walker/validators/code.rs b/src/mdschema/validator/node_walker/validators/code.rs index cdf4ce4..12babe2 100644 --- a/src/mdschema/validator/node_walker/validators/code.rs +++ b/src/mdschema/validator/node_walker/validators/code.rs @@ -1,3 +1,8 @@ +//! Code block validator for node-walker comparisons. +//! +//! Types: +//! - `CodeVsCodeValidator`: validates code block language and content, with +//! optional matcher-based captures in schema text. use serde_json::json; use crate::invariant_violation; @@ -60,8 +65,6 @@ fn validate_code_vs_code_impl(walker: &ValidatorWalker) -> ValidationResult { let schema_cursor = walker.schema_cursor().clone(); let input_cursor = walker.input_cursor().clone(); - let schema_str = walker.schema_str(); - let input_str = walker.input_str(); #[cfg(feature = "invariant_violations")] if input_cursor.node().kind() != "fenced_code_block" @@ -75,14 +78,14 @@ fn validate_code_vs_code_impl(walker: &ValidatorWalker) -> ValidationResult { ); } - let input_extracted = match extract_codeblock_contents(&input_cursor, input_str) { + let input_extracted = match extract_codeblock_contents(&input_cursor, walker.input_str()) { Ok(value) => value, Err(error) => { result.add_error(error); return result; } }; - let schema_extracted = match extract_codeblock_contents(&schema_cursor, schema_str) { + let schema_extracted = match extract_codeblock_contents(&schema_cursor, walker.schema_str()) { Ok(value) => value, Err(error) => { result.add_error(error); @@ -204,8 +207,8 @@ fn validate_code_vs_code_impl(walker: &ValidatorWalker) -> ValidationResult { mod tests { use serde_json::json; - use crate::mdschema::validator::node_walker::validators::test_utils::ValidatorTester; - use crate::mdschema::validator::ts_types::both_are_codeblocks; + use super::super::test_utils::ValidatorTester; + use crate::mdschema::validator::ts_types::*; use super::*; @@ -221,7 +224,11 @@ mod tests { .peek_nodes(|(s, i)| assert!(both_are_codeblocks(s, i))) .validate_complete(); - assert!(result.errors().is_empty(), "Expected no errors, got {:?}", result.errors()); + assert!( + result.errors().is_empty(), + "Expected no errors, got {:?}", + result.errors() + ); assert_eq!(result.value(), &json!({})); // negative case: change input so it is not the same as the schema @@ -269,6 +276,9 @@ fn main() {} .validate_complete(); assert!(result.errors().is_empty()); - assert_eq!(result.value(), &json!({ "lang": "rust", "code": "fn main() {}" })) + assert_eq!( + result.value(), + &json!({ "lang": "rust", "code": "fn main() {}" }) + ) } } diff --git a/src/mdschema/validator/node_walker/validators/containers.rs b/src/mdschema/validator/node_walker/validators/containers.rs new file mode 100644 index 0000000..62e5a9a --- /dev/null +++ b/src/mdschema/validator/node_walker/validators/containers.rs @@ -0,0 +1,481 @@ +//! Textual container validator for node-walker comparisons. +//! +//! Types: +//! - `TextualContainerVsTextualContainerValidator`: walks inline children in +//! paragraphs/emphasis and validates them with matcher support and link-aware +//! handling. +use log::trace; +use tree_sitter::TreeCursor; + +use crate::mdschema::validator::node_walker::helpers::count_non_literal_matchers_in_children::count_non_literal_matchers_in_children; +use crate::mdschema::validator::validator_walker::ValidatorWalker; +use crate::mdschema::validator::{ + errors::*, + matcher::matcher::Matcher, + node_walker::{ + ValidationResult, + helpers::expected_input_nodes::expected_input_nodes, + validators::{ + Validator, ValidatorImpl, links::LinkVsLinkValidator, + textual::TextualVsTextualValidator, + }, + }, + ts_types::*, + ts_utils::count_siblings, +}; +use crate::{compare_node_kinds_check, invariant_violation}; + +/// Validate a textual region of input against a textual region of schema. +/// +/// Takes two cursors pointing at text containers in the schema and input, and +/// validates them. The input text container may have a single matcher, and +/// potentially many other types of nodes. For example: +/// +/// Schema: +/// ```md +/// **Test** _*test*_ `test///`! `match:/test/` *foo*. +/// ``` +/// +/// Input: +/// ```md +/// **Test** _*test*_ `test///`! test *foo*. +/// +/// # Algorithm +/// +/// This works by: +/// +/// 1. Count the number of top level matchers in the schema. Find the first +/// valid one. Then keep going, but if there are more than 1, error. +/// 2. Count the number of nodes for both the input and schema using special +/// utility that takes into account literal matchers. +/// 3. Walk the input and schema cursors at the same rate, and walk down ane +/// recurse, which takes us to our base case of directly validating the contents +/// and kind of the node. If the node we are at is a code node, look at it and +/// the next node. If the two nodes correspond to a literal matcher: +/// - Match the inside of the matcher against the corresponding code node in the input. +/// - Then if there is additional text in the subsequent text node after the code node, +/// check that there is a text node in the input, maybe error, and if there is, +/// validate that the contents of the rest of it is the same. +/// - Then move to the next node pair, hopping two nodes at once for the schema node. +pub(super) struct TextualContainerVsTextualContainerValidator; + +impl ValidatorImpl for TextualContainerVsTextualContainerValidator { + fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + let mut result = + ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); + + let mut schema_cursor = walker.schema_cursor().clone(); + let mut input_cursor = walker.input_cursor().clone(); + + #[cfg(feature = "invariant_violations")] + if !both_are_textual_containers(&schema_cursor.node(), &input_cursor.node()) { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "expected textual container nodes" + ); + } + + compare_node_kinds_check!( + schema_cursor, + input_cursor, + walker.schema_str(), + walker.input_str(), + result + ); + + if is_repeated_matcher_paragraph(&schema_cursor, walker.schema_str()) { + return ParagraphVsRepeatedMatcherParagraphValidator::validate(walker, got_eof); + } + + match count_non_literal_matchers_in_children(&schema_cursor, walker.schema_str()) { + Ok(non_repeating_matchers_count) if non_repeating_matchers_count > 1 && got_eof => { + result.add_error(ValidationError::SchemaError( + SchemaError::MultipleMatchersInNodeChildren { + schema_index: schema_cursor.descendant_index(), + received: non_repeating_matchers_count, + }, + )) + } + Ok(_) => { + // Exactly one non repeating matcher is OK! + } + Err(err) => { + result.add_error(err); + + return result; + } + } + + let (expected_input_node_count, actual_input_node_count) = { + let mut schema_cursor = schema_cursor.clone(); + schema_cursor.goto_first_child(); + + let mut input_cursor = input_cursor.clone(); + input_cursor.goto_first_child(); + + let expected_input_node_count = + match expected_input_nodes(&schema_cursor, walker.schema_str()) { + Ok(expected_input_node_count) => expected_input_node_count, + Err(error) => { + result.add_error(error); + return result; + } + }; + + let actual_input_node_count = count_siblings(&input_cursor) + 1; // including the node we are currently at + + (expected_input_node_count, actual_input_node_count) + }; + + if (actual_input_node_count != expected_input_node_count) && got_eof { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::ChildrenLengthMismatch { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + expected: ChildrenCount::from_specific(expected_input_node_count), + actual: actual_input_node_count, + }, + )); + } + + // Go from the container to the first child in the container, and then + // iterate over the siblings at the same rate. + match ( + input_cursor.goto_first_child(), + schema_cursor.goto_first_child(), + ) { + (true, true) => {} // nothing to do + (false, false) => { + return result; + } + (true, false) => todo!(), + (false, true) => todo!(), + } + + loop { + let pair_result = if both_are_link_nodes(&schema_cursor.node(), &input_cursor.node()) + || both_are_image_nodes(&schema_cursor.node(), &input_cursor.node()) + { + LinkVsLinkValidator::validate( + &walker.with_cursors(&schema_cursor, &input_cursor), + got_eof, + ) + } else { + let new_result = TextualVsTextualValidator::validate( + &walker.with_cursors(&schema_cursor, &input_cursor), + got_eof, + ); + new_result.walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); + new_result + }; + + result.join_other_result(&pair_result); + + if !schema_cursor.goto_next_sibling() || !input_cursor.goto_next_sibling() { + break; + } + } + + result + } +} + +/// We special case paragraphs that are just a single code node that is a +/// repeated matcher. This function attempts to match what we call a repeated +/// matcher paragraph. +/// +/// If we see a paragraph that looks like: +/// +/// ```md +/// # Hi there +/// +/// `paragraphs:/.*/`{2,2} +/// ``` +/// +/// (Where the `` `test:/.*/`{,} `` is the paragraph) +/// +/// Then we expect an input that has that many paragraphs, and we accumulate them into an array: +/// +/// ```md +/// # Hi there +/// +/// This is the first paragraph +/// +/// This is the second paragraph +/// ``` +/// +/// And the output is +/// +/// ```json +/// { +/// "paragraphs": [ +/// "This is the first paragraph", +/// "This is the second paragraph" +/// ] +/// } +/// ``` +pub(super) struct ParagraphVsRepeatedMatcherParagraphValidator; + +impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { + fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + let result = ValidationResult::from_cursors(walker.schema_cursor(), &walker.input_cursor()); + + let mut schema_cursor = walker.schema_cursor().clone(); + let mut input_cursor = walker.input_cursor().clone(); + + #[cfg(feature = "invariant_violations")] + if !both_are_paragraphs( + &walker.schema_cursor().node(), + &walker.input_cursor().node(), + ) { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "repeated containers are only possible for paragraphs" + ); + } + + // Go from the container to the first child in the container, and then + // iterate over the siblings at the same rate. + match ( + input_cursor.goto_first_child(), + schema_cursor.goto_first_child(), + ) { + (true, true) => { + // Great, keep going + } + (false, false) => { + // nothing to do + return result; + } + (true, false) => todo!(), + (false, true) => todo!(), + } + + match Matcher::try_from_schema_cursor(&schema_cursor, walker.schema_str()) { + Ok(matcher) if matcher.is_repeated() => { + todo!() + } + _ => { + #[cfg(feature = "invariant_violations")] + invariant_violation!( + &schema_cursor, + &input_cursor, + "we should be at a repeated matcher" + ) + } + } + } +} + +/// Check whether a paragraph is a repeated paragraph matcher. +/// +/// A paragraph is a repeated paragraph matcher if it has a single child, which +/// is a a repeated matcher. +/// +/// For example, +/// +/// ``` +/// `test:/test/`{,} +/// ``` +/// +/// Contains a document with one child, which is a repeated paragraph matcher, +/// whereas +/// +/// ``` +/// `test:/test/` test +/// ``` +/// +/// Contains a document with one child, which is just a normal paragraph with a +/// matcher in it. +/// +/// # Arguments +/// +/// * `schema_cursor`: The cursor pointing to a paragraph that might be a repeated matcher paragraph. +/// * `schema_str`: The full input document (so far). +fn is_repeated_matcher_paragraph(schema_cursor: &TreeCursor, schema_str: &str) -> bool { + // We must be at a paragraph node + if !is_paragraph_node(&schema_cursor.node()) { + trace!("is_repeated_matcher_paragraph: not a paragraph node, returning false"); + return false; + } + + // All repeating matchers have a code span followed by text. This is a nonstarter. + if schema_cursor.node().child_count() != 2 { + trace!("is_repeated_matcher_paragraph: child count is not 2, returning false"); + return false; + } + + let mut schema_cursor = schema_cursor.clone(); + schema_cursor.goto_first_child(); // note we know there is one because we checked above + + match Matcher::try_from_schema_cursor(&schema_cursor, schema_str) { + Ok(matcher) if matcher.is_repeated() => true, + Ok(_) => false, + Err(_) => false, + } +} + +#[cfg(test)] +mod tests { + use serde_json::json; + + use super::{TextualContainerVsTextualContainerValidator, is_repeated_matcher_paragraph}; + use crate::mdschema::validator::{ + errors::{SchemaError, ValidationError}, + node_pos_pair::NodePosPair, + node_walker::validators::test_utils::ValidatorTester, + ts_types::*, + ts_utils::parse_markdown, + }; + + #[test] + fn test_is_repeated_matcher_paragraph_simple_non_paragraph() { + let schema_str = "`test`"; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); + schema_cursor.goto_first_child(); + assert!(is_inline_code_node(&schema_cursor.node())); + assert!(!is_repeated_matcher_paragraph(&schema_cursor, schema_str)); + } + + #[test] + fn test_is_repeated_matcher_paragraph_simple_non_repeating() { + let schema_str = "this is just a normal paragraph"; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); + assert!(is_paragraph_node(&schema_cursor.node())); + assert!(!is_repeated_matcher_paragraph(&schema_cursor, schema_str)); + } + + #[test] + fn test_is_repeated_matcher_paragraph_simple_repeating_matcher() { + let schema_str = "`test:/test/`{,}"; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); + assert!(is_paragraph_node(&schema_cursor.node())); + assert!(is_repeated_matcher_paragraph(&schema_cursor, schema_str)); + } + + #[test] + fn test_is_repeated_matcher_paragraph_matcher_non_repeating() { + let schema_str = "`test:/test/` test"; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); + assert!(is_paragraph_node(&schema_cursor.node())); + assert!(!is_repeated_matcher_paragraph(&schema_cursor, schema_str)); + } + + #[test] + fn test_is_repeated_matcher_paragraph_matcher_invalid_matcher() { + let schema_str = "`fjeiaofjioweajf` test"; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); + assert!(is_paragraph_node(&schema_cursor.node())); + assert!(!is_repeated_matcher_paragraph(&schema_cursor, schema_str)); + } + + #[test] + fn test_is_repeated_matcher_paragraph_matcher_valid_literal_matcher() { + let schema_str = "`this is invalid`! test"; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); + assert!(is_paragraph_node(&schema_cursor.node())); + assert!(!is_repeated_matcher_paragraph(&schema_cursor, schema_str)); + } + + #[test] + fn test_validate_textual_container_vs_textual_container_with_content_and_link() { + let schema_str = "# Test Wolf [hi](https://example.com)"; + let input_str = "# Test Wolf [hi](https://foobar.com)"; + + let result = ValidatorTester::::from_strs( + schema_str, input_str, + ) + .walk() + .goto_first_child_then_unwrap() + .goto_first_child_then_unwrap() + .goto_next_sibling_then_unwrap() + .peek_nodes(|(s, i)| assert!(is_heading_content_node(s) && is_heading_content_node(i))) + .validate_complete(); + + assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(9, 9)); + assert!(!result.errors().is_empty()); + assert_eq!(result.value(), &json!({})); + } + + #[test] + fn test_validate_textual_container_vs_textual_container_header_content() { + let schema_str = "# Test Wolf"; + let input_str = "# Test Wolf"; + + let result = ValidatorTester::::from_strs( + schema_str, input_str, + ) + .walk() + .goto_first_child_then_unwrap() + .goto_first_child_then_unwrap() + .goto_next_sibling_then_unwrap() + .peek_nodes(|(s, i)| assert!(is_heading_content_node(s) && is_heading_content_node(i))) + .validate_complete(); + + assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(4, 4)); + assert_eq!(result.errors(), &vec![]); + assert_eq!(result.value(), &json!({})); + } + + #[test] + fn test_validate_textual_container_vs_textual_container_header_content_and_matcher() { + let schema_str = "# Test `name:/[a-zA-Z]+/`"; + let input_str = "# Test Wolf"; + + let result = ValidatorTester::::from_strs( + schema_str, input_str, + ) + .walk() + .goto_first_child_then_unwrap() + .goto_first_child_then_unwrap() + .goto_next_sibling_then_unwrap() + .peek_nodes(|(s, i)| assert!(is_heading_content_node(s) && is_heading_content_node(i))) + .validate_complete(); + + assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(6, 4)); + assert_eq!(result.errors(), &vec![]); + assert_eq!(result.value(), &json!({"name": "Wolf"})); + } + + #[test] + fn test_validate_textual_container_vs_textual_container_link_then_bad_node() { + let schema_str = "# Heading [test]({a:/a/}) `b:/b/`"; + let input_str = "# Heading [test](a) b"; + + let result = ValidatorTester::::from_strs( + schema_str, input_str, + ) + .walk() + .goto_first_child_then_unwrap() + .goto_first_child_then_unwrap() + .goto_next_sibling_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_textual_containers(s, i))) + .validate_complete(); + + let errors = result.errors().to_vec(); + let value = result.value().clone(); + + assert_eq!( + *result.farthest_reached_pos(), + NodePosPair::from_pos(12, 10) + ); + assert_eq!(errors, vec![]); + assert_eq!(value, json!({"a": "a", "b": "b"})); + } +} diff --git a/src/mdschema/validator/node_walker/validators/headings.rs b/src/mdschema/validator/node_walker/validators/headings.rs index 4555fe4..d55312f 100644 --- a/src/mdschema/validator/node_walker/validators/headings.rs +++ b/src/mdschema/validator/node_walker/validators/headings.rs @@ -1,3 +1,8 @@ +//! Heading validator for node-walker comparisons. +//! +//! Types: +//! - `HeadingVsHeadingValidator`: confirms heading kinds align and delegates +//! content checks to textual container validation. use log::trace; use tree_sitter::TreeCursor; @@ -5,12 +10,9 @@ use crate::invariant_violation; use crate::mdschema::validator::errors::ValidationError; use crate::mdschema::validator::node_walker::ValidationResult; use crate::mdschema::validator::node_walker::helpers::compare_node_kinds::compare_node_kinds; -use crate::mdschema::validator::node_walker::validators::textual_container::TextualContainerVsTextualContainerValidator; +use crate::mdschema::validator::node_walker::validators::containers::TextualContainerVsTextualContainerValidator; use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; -use crate::mdschema::validator::ts_types::{ - both_are_headings, is_heading_content_node, is_heading_node, is_marker_node, - is_textual_container_node, -}; +use crate::mdschema::validator::ts_types::*; use crate::mdschema::validator::ts_utils::waiting_at_end; use crate::mdschema::validator::validator_walker::ValidatorWalker; @@ -25,9 +27,6 @@ impl ValidatorImpl for HeadingVsHeadingValidator { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); - let schema_str = walker.schema_str(); - let input_str = walker.input_str(); - let mut schema_cursor = walker.schema_cursor().clone(); let mut input_cursor = walker.input_cursor().clone(); @@ -43,9 +42,12 @@ impl ValidatorImpl for HeadingVsHeadingValidator { } // This also checks the *type* of heading that they are at - if let Some(error) = - compare_node_kinds(&schema_cursor, &input_cursor, schema_str, input_str) - { + if let Some(error) = compare_node_kinds( + &schema_cursor, + &input_cursor, + walker.schema_str(), + walker.input_str(), + ) { if waiting_at_end(got_eof, walker.input_str(), &input_cursor) && both_are_headings(&schema_cursor.node(), &input_cursor.node()) { @@ -137,12 +139,11 @@ fn ensure_at_heading_content(cursor: &mut TreeCursor) -> Result ValidationResult { - validate_link_vs_link_impl(walker, got_eof) - } -} - -fn validate_link_vs_link_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); - - let schema_str = walker.schema_str(); - let input_str = walker.input_str(); - - let mut schema_cursor = walker.schema_cursor().clone(); - let mut input_cursor = walker.input_cursor().clone(); - - compare_node_kinds_check!(schema_cursor, input_cursor, schema_str, input_str, result); - - if let Err(error) = ensure_at_link_start(&mut input_cursor) { - result.add_error(error); - return result; - } - - if let Err(error) = ensure_at_link_start(&mut schema_cursor) { - result.add_error(error); - return result; - } - - let link_input_cursor = input_cursor.clone(); - - if !schema_cursor.goto_first_child() || !input_cursor.goto_first_child() { - #[cfg(feature = "invariant_violations")] - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "link nodes must have children" + let mut result = + ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); + + let mut schema_cursor = walker.schema_cursor().clone(); + let mut input_cursor = walker.input_cursor().clone(); + + compare_node_kinds_check!( + schema_cursor, + input_cursor, + walker.schema_str(), + walker.input_str(), + result ); - } - compare_node_kinds_check!(schema_cursor, input_cursor, schema_str, input_str, result); + if let Err(error) = ensure_at_link_start(&mut input_cursor) { + result.add_error(error); + return result; + } - // We're now at the alt - // - // ├─ (text[4]1..10) - // └─ (link[5]10..31) - // ├─ ([6]11..15) - // │ └─ (text[7]11..15) - // └─ ([8]17..30) - // └─ (text[9]17..30) - #[cfg(feature = "invariant_violations")] - if !both_are_link_description_nodes(&schema_cursor.node(), &input_cursor.node()) { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "we should be at link text, but at {:?}", - input_cursor.node().kind() - ); - } + if let Err(error) = ensure_at_link_start(&mut schema_cursor) { + result.add_error(error); + return result; + } - let child_result = compare_link_child_text( - &schema_cursor, - &input_cursor, - schema_str, - input_str, - got_eof, - ); - result.join_other_result(&child_result); - if child_result.has_errors() { - return result; - } + let link_input_cursor = input_cursor.clone(); - if let Some(pos) = link_child_pos(&schema_cursor, &input_cursor) { - result.keep_farther_pos(&pos); - } + if !schema_cursor.goto_first_child() || !input_cursor.goto_first_child() { + #[cfg(feature = "invariant_violations")] + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "link nodes must have children" + ); + } - #[cfg(feature = "invariant_violations")] - if !schema_cursor.goto_next_sibling() || !input_cursor.goto_next_sibling() { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "link nodes must have a destination" + compare_node_kinds_check!( + schema_cursor, + input_cursor, + walker.schema_str(), + walker.input_str(), + result ); - } - compare_node_kinds_check!(schema_cursor, input_cursor, schema_str, input_str, result); + // We're now at the alt + // + // ├─ (text[4]1..10) + // └─ (link[5]10..31) + // ├─ ([6]11..15) + // │ └─ (text[7]11..15) + // └─ ([8]17..30) + // └─ (text[9]17..30) + #[cfg(feature = "invariant_violations")] + if !both_are_link_description_nodes(&schema_cursor.node(), &input_cursor.node()) { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "we should be at link text, but at {:?}", + input_cursor.node().kind() + ); + } - if is_link_destination_node(&schema_cursor.node()) { - let destination_result = validate_link_destination( - &schema_cursor, - &input_cursor, - schema_str, - input_str, - got_eof, - ); - result.join_other_result(&destination_result); - // Don't return early since we want to move the cursor (20 lines down) first - } else { let child_result = compare_link_child_text( &schema_cursor, &input_cursor, - schema_str, - input_str, + walker.schema_str(), + walker.input_str(), got_eof, ); result.join_other_result(&child_result); if child_result.has_errors() { return result; } - } - if !waiting_at_end(got_eof, input_str, &link_input_cursor) - && let Some(pos) = link_child_pos(&schema_cursor, &input_cursor) - { - result.keep_farther_pos(&pos); - } else { - result.sync_cursor_pos(&schema_cursor, &input_cursor); - } + if let Some(pos) = link_child_pos(&schema_cursor, &input_cursor) { + result.keep_farther_pos(&pos); + } - result + #[cfg(feature = "invariant_violations")] + if !schema_cursor.goto_next_sibling() || !input_cursor.goto_next_sibling() { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "link nodes must have a destination" + ); + } + + compare_node_kinds_check!( + schema_cursor, + input_cursor, + walker.schema_str(), + walker.input_str(), + result + ); + + if is_link_destination_node(&schema_cursor.node()) { + let destination_result = validate_link_destination( + &schema_cursor, + &input_cursor, + walker.schema_str(), + walker.input_str(), + got_eof, + ); + result.join_other_result(&destination_result); + // Don't return early since we want to move the cursor (20 lines down) first + } else { + let child_result = compare_link_child_text( + &schema_cursor, + &input_cursor, + walker.schema_str(), + walker.input_str(), + got_eof, + ); + result.join_other_result(&child_result); + if child_result.has_errors() { + return result; + } + } + + if !waiting_at_end(got_eof, walker.input_str(), &link_input_cursor) + && let Some(pos) = link_child_pos(&schema_cursor, &input_cursor) + { + result.keep_farther_pos(&pos); + } else { + result.sync_cursor_pos(&schema_cursor, &input_cursor); + } + + result + } } fn ensure_at_link_start(cursor: &mut TreeCursor) -> Result<(), ValidationError> { @@ -323,11 +336,9 @@ fn link_child_pos(schema_cursor: &TreeCursor, input_cursor: &TreeCursor) -> Opti mod tests { use serde_json::json; - use crate::mdschema::validator::{ - node_pos_pair::NodePosPair, node_walker::validators::test_utils::ValidatorTester, - }; - + use super::super::test_utils::ValidatorTester; use super::LinkVsLinkValidator; + use crate::mdschema::validator::node_pos_pair::NodePosPair; #[test] fn test_validate_link_vs_link_literal() { diff --git a/src/mdschema/validator/node_walker/validators/lists.rs b/src/mdschema/validator/node_walker/validators/lists.rs index f887887..a473345 100644 --- a/src/mdschema/validator/node_walker/validators/lists.rs +++ b/src/mdschema/validator/node_walker/validators/lists.rs @@ -1,22 +1,29 @@ +//! List validator for node-walker comparisons. +//! +//! Types: +//! - `ListVsListValidator`: validates list structure and list item content, +//! including nested lists and matcher-aware text comparisons. #[cfg(feature = "invariant_violations")] -use crate::mdschema::validator::{ts_types::is_list_item_node, validator_walker::ValidatorWalker}; +use crate::mdschema::validator::validator_walker::ValidatorWalker; use crate::mdschema::validator::{ errors::MalformedStructureKind, matcher::matcher::{Matcher, MatcherError}, node_walker::{ ValidationResult, validators::{ - Validator, ValidatorImpl, - textual_container::TextualContainerVsTextualContainerValidator, + Validator, ValidatorImpl, containers::TextualContainerVsTextualContainerValidator, }, }, - ts_types::{both_are_list_items, both_are_markers, both_are_paragraphs, is_list_node}, - ts_utils::{count_siblings, get_node_and_next_node, get_node_text, has_single_code_child, has_subsequent_node_of_kind, waiting_at_end}, + ts_types::*, + ts_utils::{ + count_siblings, get_node_and_next_node, get_node_text, has_single_code_child, + has_subsequent_node_of_kind, waiting_at_end, + }, }; use crate::{ invariant_violation, - mdschema::validator::{ - errors::{ChildrenCount, SchemaError, SchemaViolationError, ValidationError}, + mdschema::validator::errors::{ + ChildrenCount, SchemaError, SchemaViolationError, ValidationError, }, }; use log::trace; @@ -85,419 +92,424 @@ pub(super) struct ListVsListValidator; impl ValidatorImpl for ListVsListValidator { fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - validate_list_vs_list_impl(walker, got_eof) - } -} - -fn validate_list_vs_list_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); - - let schema_str = walker.schema_str(); - let input_str = walker.input_str(); - - let mut schema_cursor = walker.schema_cursor().clone(); - let mut input_cursor = walker.input_cursor().clone(); + let mut result = + ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); - // We want to ensure that the types of lists are the same - compare_node_kinds_check!(schema_cursor, input_cursor, schema_str, input_str, result); - - let at_list_schema_cursor = schema_cursor.clone(); - let at_list_input_cursor = input_cursor.clone(); - - match ( - ensure_at_first_list_item(&mut input_cursor), - ensure_at_first_list_item(&mut schema_cursor), - ) { - (Ok(_), Ok(_)) => {} - (Err(_), Ok(_)) if waiting_at_end(got_eof, walker.input_str(), &input_cursor) => { - // Not ready yet, but that's OK! - result.sync_cursor_pos(&schema_cursor, &input_cursor); - return result; - } - _ => {} // we'll deal with the mismatch later in `validate_list_item_contents_vs_list_item_contents` - } + let mut schema_cursor = walker.schema_cursor().clone(); + let mut input_cursor = walker.input_cursor().clone(); - #[cfg(feature = "invariant_violations")] - if !is_list_item_node(&schema_cursor.node()) || !is_list_item_node(&input_cursor.node()) { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "expected list_item nodes after list traversal" + // We want to ensure that the types of lists are the same + compare_node_kinds_check!( + schema_cursor, + input_cursor, + walker.schema_str(), + walker.input_str(), + result ); - } - match extract_repeated_matcher_from_list_item(&schema_cursor, schema_str) { - // We were able to find a valid repeated matcher in the schema list item. - Some(Ok(matcher)) => { - let min_items = matcher.extras().min_items().unwrap_or(0); - let max_items = matcher.extras().max_items(); - trace!( - "Found repeated matcher: id={:?}, min_items={}, max_items={:?}, variable_length={}", - matcher.id(), - min_items, - max_items, - matcher.variable_length() - ); - - // We don't let you have a variable length matcher where there are more list elements in the schema. - if matcher.variable_length() && has_subsequent_node_of_kind(&schema_cursor, "list_item") - { - trace!("Error: Variable length matcher found with subsequent schema list items"); - result.add_error(ValidationError::SchemaError( - SchemaError::RepeatingMatcherUnbounded { - schema_index: schema_cursor.descendant_index(), - }, - )); + let at_list_schema_cursor = schema_cursor.clone(); + let at_list_input_cursor = input_cursor.clone(); + + match ( + ensure_at_first_list_item(&mut input_cursor), + ensure_at_first_list_item(&mut schema_cursor), + ) { + (Ok(_), Ok(_)) => {} + (Err(_), Ok(_)) if waiting_at_end(got_eof, walker.input_str(), &input_cursor) => { + // Not ready yet, but that's OK! + result.sync_cursor_pos(&schema_cursor, &input_cursor); return result; } + _ => {} // we'll deal with the mismatch later in `validate_list_item_contents_vs_list_item_contents` + } - let mut values_at_level = Vec::with_capacity(max_items.unwrap_or(1)); - let mut validate_so_far = 0; + #[cfg(feature = "invariant_violations")] + if !is_list_item_node(&schema_cursor.node()) || !is_list_item_node(&input_cursor.node()) { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "expected list_item nodes after list traversal" + ); + } - loop { - trace!("Validating list item #{}", validate_so_far + 1,); + match extract_repeated_matcher_from_list_item(&schema_cursor, walker.schema_str()) { + // We were able to find a valid repeated matcher in the schema list item. + Some(Ok(matcher)) => { + let min_items = matcher.extras().min_items().unwrap_or(0); + let max_items = matcher.extras().max_items(); + trace!( + "Found repeated matcher: id={:?}, min_items={}, max_items={:?}, variable_length={}", + matcher.id(), + min_items, + max_items, + matcher.variable_length() + ); - #[cfg(feature = "invariant_violations")] - if input_cursor.node().kind() != "list_item" - || schema_cursor.node().kind() != "list_item" + // We don't let you have a variable length matcher where there are more list elements in the schema. + if matcher.variable_length() + && has_subsequent_node_of_kind(&schema_cursor, "list_item") { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "expected list_item nodes while validating repeated matcher" + trace!( + "Error: Variable length matcher found with subsequent schema list items" ); + result.add_error(ValidationError::SchemaError( + SchemaError::RepeatingMatcherUnbounded { + schema_index: schema_cursor.descendant_index(), + }, + )); + return result; } - let (new_matches, early_return) = validate_list_item_contents_vs_list_item_contents( - &schema_cursor, - &input_cursor, - schema_str, - input_str, - got_eof, - ); + let mut values_at_level = Vec::with_capacity(max_items.unwrap_or(1)); + let mut validate_so_far = 0; - let has_errors = new_matches.has_errors(); - validate_so_far += 1; - values_at_level.push(new_matches.value().clone()); - result.join_errors(new_matches.errors()); - if early_return || has_errors { - return result; - } + loop { + trace!("Validating list item #{}", validate_so_far + 1,); - trace!( - "Completed validation of list item #{}, moving to next", - validate_so_far - ); + #[cfg(feature = "invariant_violations")] + if input_cursor.node().kind() != "list_item" + || schema_cursor.node().kind() != "list_item" + { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "expected list_item nodes while validating repeated matcher" + ); + } + + let (new_matches, early_return) = + validate_list_item_contents_vs_list_item_contents( + &schema_cursor, + &input_cursor, + walker.schema_str(), + walker.input_str(), + got_eof, + ); + + let has_errors = new_matches.has_errors(); + validate_so_far += 1; + values_at_level.push(new_matches.value().clone()); + result.join_errors(new_matches.errors()); + if early_return || has_errors { + return result; + } - // If we've now validated the max number of items, check if there are more - if let Some(max_items) = max_items - && validate_so_far == max_items - { trace!( - "Reached max items limit ({}), checking if there are more items", - max_items + "Completed validation of list item #{}, moving to next", + validate_so_far ); - // Check if there are more items beyond the max - if input_cursor.clone().goto_next_sibling() - && !schema_cursor.clone().goto_next_sibling() + // If we've now validated the max number of items, check if there are more + if let Some(max_items) = max_items + && validate_so_far == max_items { - // There are more input items and no schema sibling to handle them - // Report error immediately - extra items won't disappear trace!( - "Error: More items than max allowed ({} > {}), early exit", - "at least one more", max_items + "Reached max items limit ({}), checking if there are more items", + max_items ); - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::ChildrenLengthMismatch { - schema_index: schema_cursor.descendant_index(), - input_index: input_cursor.descendant_index(), - expected: ChildrenCount::from_range(min_items, Some(max_items)), - actual: validate_so_far + 1, // At least one more - }, - )); - // Early exit - no more schema items to handle the extras + + // Check if there are more items beyond the max + if input_cursor.clone().goto_next_sibling() + && !schema_cursor.clone().goto_next_sibling() + { + // There are more input items and no schema sibling to handle them + // Report error immediately - extra items won't disappear + trace!( + "Error: More items than max allowed ({} > {}), early exit", + "at least one more", max_items + ); + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::ChildrenLengthMismatch { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + expected: ChildrenCount::from_range(min_items, Some(max_items)), + actual: validate_so_far + 1, // At least one more + }, + )); + // Early exit - no more schema items to handle the extras + break; + } break; } - break; - } - // Otherwise move to the next sibling, or break if there are none left - if !input_cursor.goto_next_sibling() { - break; + // Otherwise move to the next sibling, or break if there are none left + if !input_cursor.goto_next_sibling() { + break; + } } - } - - // Check if we validated enough items - if validate_so_far < min_items && got_eof { - trace!( - "Error: Not enough items validated ({} < {}) and at EOF", - validate_so_far, min_items - ); - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::ChildrenLengthMismatch { - schema_index: schema_cursor.descendant_index(), - input_index: input_cursor.descendant_index(), - expected: ChildrenCount::from_range(min_items, max_items), - actual: validate_so_far, - }, - )); - } - - // If we didn't make it to the end of the input list, there - // might be more items but that correspond to another matcher. - // - // For example, with a schema like: - // - // ```md - // - `testA:/test\d/`{2,2} - // - `testB:/line2test\d/`{2,2} - // ``` - // - // And input like: - // - // ```md - // - test1 - // - test2 - // - line2test1 - // - line2test2 - // ``` - // - // We want to validate the first two, pushing them into our - // list, and then the second two. - // - // { "testA": ["test1", "test2"], - // "testB": ["line2test1", "line2test2"] } - // - // In these cases we are looking at an schema tree that looks like: - // - // (tight_list) - // ├── (list_item) <-- where we are now - // │ ├── (list_marker) - // │ └── (paragraph) - // │ ├── (code_span) - // │ │ └── (text) - // │ └── (text) - // └── (list_item) <-- where we are after .goto_next_sibling() when it returns true - // ├── (list_marker) - // └── (paragraph) - // ├── (code_span) - // │ └── (text) - // └── (text) - // - // If there are more items to validate AT THE SAME LEVEL, recurse to - // validate them. We now use the *next* schema node too. - if schema_cursor.goto_next_sibling() && input_cursor.goto_next_sibling() { - let next_result = ListVsListValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); - result.join_other_result(&next_result); - } - - trace!("Completed validation of all {} list items", validate_so_far); - // Now, if there's another pair, recurse and validate it - if schema_cursor.goto_first_child() && input_cursor.goto_first_child() { - while schema_cursor.goto_next_sibling() && input_cursor.goto_next_sibling() {} - - // There is a deeper list! - if is_list_node(&schema_cursor.node()) && is_list_node(&input_cursor.node()) { + // Check if we validated enough items + if validate_so_far < min_items && got_eof { trace!( - "Found next sibling pairs, recursing to validate next list elements; cursors are at {:?} and {:?}", - input_cursor.node().kind(), - schema_cursor.node().kind() + "Error: Not enough items validated ({} < {}) and at EOF", + validate_so_far, min_items ); + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::ChildrenLengthMismatch { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + expected: ChildrenCount::from_range(min_items, max_items), + actual: validate_so_far, + }, + )); + } + // If we didn't make it to the end of the input list, there + // might be more items but that correspond to another matcher. + // + // For example, with a schema like: + // + // ```md + // - `testA:/test\d/`{2,2} + // - `testB:/line2test\d/`{2,2} + // ``` + // + // And input like: + // + // ```md + // - test1 + // - test2 + // - line2test1 + // - line2test2 + // ``` + // + // We want to validate the first two, pushing them into our + // list, and then the second two. + // + // { "testA": ["test1", "test2"], + // "testB": ["line2test1", "line2test2"] } + // + // In these cases we are looking at an schema tree that looks like: + // + // (tight_list) + // ├── (list_item) <-- where we are now + // │ ├── (list_marker) + // │ └── (paragraph) + // │ ├── (code_span) + // │ │ └── (text) + // │ └── (text) + // └── (list_item) <-- where we are after .goto_next_sibling() when it returns true + // ├── (list_marker) + // └── (paragraph) + // ├── (code_span) + // │ └── (text) + // └── (text) + // + // If there are more items to validate AT THE SAME LEVEL, recurse to + // validate them. We now use the *next* schema node too. + if schema_cursor.goto_next_sibling() && input_cursor.goto_next_sibling() { let next_result = ListVsListValidator::validate( &walker.with_cursors(&schema_cursor, &input_cursor), got_eof, ); - // We need to be able to capture errors that happen in the recursive call - result.join_errors(next_result.errors()); - values_at_level.push(next_result.value().clone()); + result.join_other_result(&next_result); } - } else { - trace!("No more sibling pairs found"); - } - // Store the array that we just gathered - if let Some(matcher_id) = matcher.id() { - trace!("Storing matches for matcher id: {}", matcher_id); - - result.set_match( - matcher_id, - json!( - values_at_level - .iter() - .map(|value| { - // If we have a schema: - // - // ```md - // - `name:/test\d/`{2,2} - // - `name:/test\d/`{1,1} - // ``` - // - // Initially, we run this at the top level, gather something like - // - // matches_at_level = [{ "test": "test1" }, { "test": "test2" }] - // - // Then we might recurse, and end up with something like - // - // matches_at_level = [{ "test": "test1" }, { "test": "test2" }, { "deep": "test3" }] - // - // Then we iterate over the matches_at_level and unpack all the ones that have our - // id (we are top level), so "test," and get - // - // matches_at_level = ["test1", "test2", { "deep": "test3" }] - // - // Note that we don't unpack anything that is not our id (see below, where we - // "don't unpack!"). - - let mut matches_as_obj = value.as_object().unwrap().clone(); - - // TODO: can we avoid these clones? - if let Some(matcher_id) = matcher.id() { - let match_for_same_id = matches_as_obj.remove(matcher_id); - - // Unwrap it to be loose in the array if we can - match match_for_same_id { - Some(match_for_same_id) => match_for_same_id, - None => value.clone(), // don't unpack! - } - } else { - value.clone() - } - }) - .collect::>() - ), - ); - } + trace!("Completed validation of all {} list items", validate_so_far); - // Now we have validated as many as we could, let's add it to the result. - // Update the cursors to be as far as we got, and then join the results. - trace!("Returning validation result for repeated matcher"); - return result; - } - // We were able to find a matcher in the schema list item, but it was invalid (we failed to parse it). - Some(Err(e)) => { - trace!("Error: Found invalid matcher in schema list item: {:?}", e); - result.add_error(ValidationError::SchemaError(SchemaError::MatcherError { - error: e, - schema_index: schema_cursor.descendant_index(), - })); - } - // We didn't find a repeating matcher. In this case, just use validate the insides directly and move on. - None => { - trace!( - "No repeated matcher found, using textual validation. Current node kinds: {:?} and {:?}", - input_cursor.node().kind(), - schema_cursor.node().kind() - ); + // Now, if there's another pair, recurse and validate it + if schema_cursor.goto_first_child() && input_cursor.goto_first_child() { + while schema_cursor.goto_next_sibling() && input_cursor.goto_next_sibling() {} - // In this case we want to make sure that the children have the - // exact same length, since they are both literal lists. Dynamic - // lengths aren't allowed for literal lists. - let remaining_schema_nodes = count_siblings(&schema_cursor); - let literal_chunk_count = count_next_n_literal_lists(&schema_cursor, schema_str); - let remaining_input_nodes = count_siblings(&input_cursor); - if remaining_schema_nodes != remaining_input_nodes { - let available_literal_items = remaining_input_nodes + 1; - - if available_literal_items < literal_chunk_count { - if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { - // Don't care for now - return result; - } else { - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::ChildrenLengthMismatch { - schema_index: at_list_schema_cursor.descendant_index(), - input_index: at_list_input_cursor.descendant_index(), - // +1 because we need to include this first node that we are currently on - expected: ChildrenCount::from_specific(literal_chunk_count), - actual: available_literal_items, - }, - )); - return result; + // There is a deeper list! + if is_list_node(&schema_cursor.node()) && is_list_node(&input_cursor.node()) { + trace!( + "Found next sibling pairs, recursing to validate next list elements; cursors are at {:?} and {:?}", + input_cursor.node().kind(), + schema_cursor.node().kind() + ); + + let next_result = ListVsListValidator::validate( + &walker.with_cursors(&schema_cursor, &input_cursor), + got_eof, + ); + // We need to be able to capture errors that happen in the recursive call + result.join_errors(next_result.errors()); + values_at_level.push(next_result.value().clone()); } + } else { + trace!("No more sibling pairs found"); } - } - if remaining_schema_nodes != remaining_input_nodes - && literal_chunk_count == remaining_schema_nodes + 1 - { - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::ChildrenLengthMismatch { - schema_index: at_list_schema_cursor.descendant_index(), - input_index: at_list_input_cursor.descendant_index(), - // +1 because we need to include this first node that we are currently on - expected: ChildrenCount::from_specific(remaining_schema_nodes + 1), - actual: remaining_input_nodes + 1, - }, - )); + // Store the array that we just gathered + if let Some(matcher_id) = matcher.id() { + trace!("Storing matches for matcher id: {}", matcher_id); + + result.set_match( + matcher_id, + json!( + values_at_level + .iter() + .map(|value| { + // If we have a schema: + // + // ```md + // - `name:/test\d/`{2,2} + // - `name:/test\d/`{1,1} + // ``` + // + // Initially, we run this at the top level, gather something like + // + // matches_at_level = [{ "test": "test1" }, { "test": "test2" }] + // + // Then we might recurse, and end up with something like + // + // matches_at_level = [{ "test": "test1" }, { "test": "test2" }, { "deep": "test3" }] + // + // Then we iterate over the matches_at_level and unpack all the ones that have our + // id (we are top level), so "test," and get + // + // matches_at_level = ["test1", "test2", { "deep": "test3" }] + // + // Note that we don't unpack anything that is not our id (see below, where we + // "don't unpack!"). + + let mut matches_as_obj = value.as_object().unwrap().clone(); + + // TODO: can we avoid these clones? + if let Some(matcher_id) = matcher.id() { + let match_for_same_id = matches_as_obj.remove(matcher_id); + + // Unwrap it to be loose in the array if we can + match match_for_same_id { + Some(match_for_same_id) => match_for_same_id, + None => value.clone(), // don't unpack! + } + } else { + value.clone() + } + }) + .collect::>() + ), + ); + } + + // Now we have validated as many as we could, let's add it to the result. + // Update the cursors to be as far as we got, and then join the results. + trace!("Returning validation result for repeated matcher"); return result; } - - let (list_item_match_result, early_return) = - validate_list_item_contents_vs_list_item_contents( - &schema_cursor, - &input_cursor, - schema_str, - input_str, - got_eof, + // We were able to find a matcher in the schema list item, but it was invalid (we failed to parse it). + Some(Err(e)) => { + trace!("Error: Found invalid matcher in schema list item: {:?}", e); + result.add_error(ValidationError::SchemaError(SchemaError::MatcherError { + error: e, + schema_index: schema_cursor.descendant_index(), + })); + } + // We didn't find a repeating matcher. In this case, just use validate the insides directly and move on. + None => { + trace!( + "No repeated matcher found, using textual validation. Current node kinds: {:?} and {:?}", + input_cursor.node().kind(), + schema_cursor.node().kind() ); - result.join_other_result(&list_item_match_result); - if early_return || list_item_match_result.has_errors() { - return result; - } + // In this case we want to make sure that the children have the + // exact same length, since they are both literal lists. Dynamic + // lengths aren't allowed for literal lists. + let remaining_schema_nodes = count_siblings(&schema_cursor); + let literal_chunk_count = + count_next_n_literal_lists(&schema_cursor, walker.schema_str()); + let remaining_input_nodes = count_siblings(&input_cursor); + if remaining_schema_nodes != remaining_input_nodes { + let available_literal_items = remaining_input_nodes + 1; + + if available_literal_items < literal_chunk_count { + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + // Don't care for now + return result; + } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::ChildrenLengthMismatch { + schema_index: at_list_schema_cursor.descendant_index(), + input_index: at_list_input_cursor.descendant_index(), + // +1 because we need to include this first node that we are currently on + expected: ChildrenCount::from_specific(literal_chunk_count), + actual: available_literal_items, + }, + )); + return result; + } + } + } - { - // Recurse down into the next list if there is one - let mut schema_cursor = schema_cursor.clone(); - let mut input_cursor = input_cursor.clone(); + if remaining_schema_nodes != remaining_input_nodes + && literal_chunk_count == remaining_schema_nodes + 1 + { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::ChildrenLengthMismatch { + schema_index: at_list_schema_cursor.descendant_index(), + input_index: at_list_input_cursor.descendant_index(), + // +1 because we need to include this first node that we are currently on + expected: ChildrenCount::from_specific(remaining_schema_nodes + 1), + actual: remaining_input_nodes + 1, + }, + )); + return result; + } - input_cursor.goto_last_child(); - schema_cursor.goto_last_child(); + let (list_item_match_result, early_return) = + validate_list_item_contents_vs_list_item_contents( + &schema_cursor, + &input_cursor, + walker.schema_str(), + walker.input_str(), + got_eof, + ); + result.join_other_result(&list_item_match_result); - compare_node_kinds_check!( - schema_cursor, - input_cursor, - schema_str, - input_str, - result - ); + if early_return || list_item_match_result.has_errors() { + return result; + } - if is_list_node(&input_cursor.node()) { - // and we know that schema is the same - input_cursor.goto_first_child(); - schema_cursor.goto_first_child(); + { + // Recurse down into the next list if there is one + let mut schema_cursor = schema_cursor.clone(); + let mut input_cursor = input_cursor.clone(); + + input_cursor.goto_last_child(); + schema_cursor.goto_last_child(); + + compare_node_kinds_check!( + schema_cursor, + input_cursor, + walker.schema_str(), + walker.input_str(), + result + ); + + if is_list_node(&input_cursor.node()) { + // and we know that schema is the same + input_cursor.goto_first_child(); + schema_cursor.goto_first_child(); + + let deeper_result = ListVsListValidator::validate( + &walker.with_cursors(&schema_cursor, &input_cursor), + got_eof, + ); + result.join_other_result(&deeper_result); + } + } - let deeper_result = ListVsListValidator::validate( + // Recurse on next sibling if available! + if schema_cursor.goto_next_sibling() && input_cursor.goto_next_sibling() { + trace!("Moving to next sibling list items for continued validation"); + let new_matches = ListVsListValidator::validate( &walker.with_cursors(&schema_cursor, &input_cursor), got_eof, ); - result.join_other_result(&deeper_result); + result.join_other_result(&new_matches); + } else { + trace!("No more sibling pairs found, validation complete"); } } - - // Recurse on next sibling if available! - if schema_cursor.goto_next_sibling() && input_cursor.goto_next_sibling() { - trace!("Moving to next sibling list items for continued validation"); - let new_matches = ListVsListValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); - result.join_other_result(&new_matches); - } else { - trace!("No more sibling pairs found, validation complete"); - } } - } - result + result + } } /// Walk forward and see how many lists after this one at the same level are literal lists. @@ -653,8 +665,7 @@ fn try_from_code_and_text_node( ) -> Result { let matcher_text = get_node_text(&matcher_node, schema_str); - let suffix_text = suffix_node - .map(|node| get_node_text(&node, schema_str)); + let suffix_text = suffix_node.map(|node| get_node_text(&node, schema_str)); Matcher::try_from_pattern_and_suffix_str(matcher_text, suffix_text) } @@ -759,20 +770,17 @@ mod tests { use serde_json::json; - use super::ListVsListValidator; + use super::super::test_utils::ValidatorTester; + use super::{ + ListVsListValidator, ensure_at_first_list_item, extract_repeated_matcher_from_list_item, + }; use crate::mdschema::validator::{ errors::{ ChildrenCount, MalformedStructureKind, NodeContentMismatchKind, SchemaViolationError, ValidationError, }, - node_walker::{ - ValidationResult, - validators::{ - lists::{ensure_at_first_list_item, extract_repeated_matcher_from_list_item}, - test_utils::ValidatorTester, - }, - }, - ts_types::both_are_list_nodes, + node_walker::ValidationResult, + ts_types::*, ts_utils::parse_markdown, }; @@ -921,7 +929,9 @@ mod tests { let input_str = "- test1\n- different"; let result = validate_lists(schema_str, input_str, false); - assert_eq!(result.errors(), &[ValidationError::SchemaViolation( + assert_eq!( + result.errors(), + &[ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { kind: NodeContentMismatchKind::Literal, schema_index: 9, @@ -939,7 +949,9 @@ mod tests { let input_str = "- test1\n - nested_different"; let result = validate_lists(schema_str, input_str, false); - assert_eq!(result.errors(), &[ValidationError::SchemaViolation( + assert_eq!( + result.errors(), + &[ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { kind: NodeContentMismatchKind::Literal, schema_index: 10, @@ -1010,7 +1022,11 @@ Footer: test (footer isn't validated with_list_vs_list) "#; let result = validate_lists(schema_str, input_str, false); - assert!(result.errors().is_empty(), "Expected no errors, got: {:?}", result.errors()); + assert!( + result.errors().is_empty(), + "Expected no errors, got: {:?}", + result.errors() + ); assert_eq!(result.value(), &json!({"id": "test2"})); } @@ -1036,7 +1052,11 @@ Footer: test (footer isn't validated with_list_vs_list) .peek_nodes(|(s, i)| assert!(both_are_list_nodes(s, i))) .validate_complete(); - assert!(result.errors().is_empty(), "Expected no errors, got: {:?}", result.errors()); + assert!( + result.errors().is_empty(), + "Expected no errors, got: {:?}", + result.errors() + ); assert_eq!(result.value(), &json!({"item": ["test1", "test2"]})); } @@ -1062,7 +1082,11 @@ Footer: test (footer isn't validated with_list_vs_list) "#; let result = validate_lists(schema_str, input_str, true); - assert!(result.errors().is_empty(), "Expected no errors, got: {:?}", result.errors()); + assert!( + result.errors().is_empty(), + "Expected no errors, got: {:?}", + result.errors() + ); assert_eq!( *result.value(), json!({"matcher1": ["match1_1"], "matcher2": ["match2_1"]}) @@ -1083,7 +1107,9 @@ Footer: test (footer isn't validated with_list_vs_list) "#; let result = validate_lists(schema_str, input_str, false); - assert_eq!(result.errors(), &[ValidationError::SchemaViolation( + assert_eq!( + result.errors(), + &[ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { kind: NodeContentMismatchKind::Literal, schema_index: 9, @@ -1119,7 +1145,9 @@ Footer: test (footer isn't validated with_list_vs_list) let result = validate_lists(schema_str, input_str, false); assert_eq!(result.value(), &json!({})); - assert_eq!(result.errors(), &[ValidationError::SchemaViolation( + assert_eq!( + result.errors(), + &[ValidationError::SchemaViolation( SchemaViolationError::ChildrenLengthMismatch { schema_index: 1, input_index: 1, @@ -1150,7 +1178,9 @@ Footer: test (footer isn't validated with_list_vs_list) let result = validate_lists(schema_str, input_str, true); assert_eq!(result.value(), &json!({})); // we stop early. TODO: capture as much as we can - assert_eq!(result.errors(), &[ValidationError::SchemaViolation( + assert_eq!( + result.errors(), + &[ValidationError::SchemaViolation( SchemaViolationError::ChildrenLengthMismatch { schema_index: 1, input_index: 1, @@ -1207,7 +1237,9 @@ Footer: test (footer isn't validated with_list_vs_list) // even with eof=false we should know that there is an error by now // Single error: test2 doesn't match testB's pattern - we return early on mismatch - assert_eq!(result.errors(), &[ValidationError::SchemaViolation( + assert_eq!( + result.errors(), + &[ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { kind: NodeContentMismatchKind::Matcher, schema_index: 11, @@ -1237,7 +1269,10 @@ Footer: test (footer isn't validated with_list_vs_list) let result = validate_lists(schema_str, input_str, true); assert!(result.errors().is_empty()); - assert_eq!(result.value(), &json!({"test": ["test1", {"deep": ["deep1"]}]})); + assert_eq!( + result.value(), + &json!({"test": ["test1", {"deep": ["deep1"]}]}) + ); } #[test] @@ -1372,7 +1407,9 @@ Footer: test (footer isn't validated with_list_vs_list) "#; let result = validate_lists(schema_str, input_str, false); - assert_eq!(result.errors(), &[ValidationError::SchemaViolation( + assert_eq!( + result.errors(), + &[ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { schema_index: 5, input_index: 5, @@ -1407,7 +1444,9 @@ Footer: test (footer isn't validated with_list_vs_list) "#; let result = validate_lists(schema_str, input_str, true); - assert_eq!(result.errors(), &[ValidationError::SchemaViolation( + assert_eq!( + result.errors(), + &[ValidationError::SchemaViolation( SchemaViolationError::MalformedNodeStructure { schema_index: 3, input_index: 4, @@ -1427,7 +1466,9 @@ Footer: test (footer isn't validated with_list_vs_list) "#; let result = validate_lists(schema_str, input_str, true); - assert_eq!(result.errors(), &[ValidationError::SchemaViolation( + assert_eq!( + result.errors(), + &[ValidationError::SchemaViolation( SchemaViolationError::MalformedNodeStructure { schema_index: 4, input_index: 3, @@ -1499,7 +1540,10 @@ Footer: test (footer isn't validated with_list_vs_list) let result = validate_lists(schema_str, input_str, true); assert!(result.errors().is_empty()); - assert_eq!(result.value(), &json!({"test": ["test1", "test2", "test3"]})); + assert_eq!( + result.value(), + &json!({"test": ["test1", "test2", "test3"]}) + ); // Negative case: below minimum let schema_str = r#" @@ -1577,8 +1621,14 @@ Footer: test (footer isn't validated with_list_vs_list) "#; let result = validate_lists(schema_str, input_str, true); - assert!(result.errors().is_empty(), "Expected no errors when list meets minimum"); - assert_eq!(result.value(), &json!({"test": ["test1", "test2", "test3", "test4"]})); + assert!( + result.errors().is_empty(), + "Expected no errors when list meets minimum" + ); + assert_eq!( + result.value(), + &json!({"test": ["test1", "test2", "test3", "test4"]}) + ); // Negative case: below minimum let schema_str = r#" @@ -1612,7 +1662,10 @@ Footer: test (footer isn't validated with_list_vs_list) "#; let result = validate_lists(schema_str, input_str, true); - assert!(result.errors().is_empty(), "Expected no errors for unlimited matcher"); + assert!( + result.errors().is_empty(), + "Expected no errors for unlimited matcher" + ); assert_eq!( *result.value(), json!({"test": ["test1", "test2", "test3", "test4", "test5"]}) @@ -1780,7 +1833,9 @@ Footer: test (footer isn't validated with_list_vs_list) let result = validate_lists(schema_str, input_str, false); // Should have no errors when got_eof=false - we're waiting for more input - assert_eq!(result.errors(), &[], + assert_eq!( + result.errors(), + &[], "Should not report error when streaming and only marker received" ); } diff --git a/src/mdschema/validator/node_walker/validators/matchers.rs b/src/mdschema/validator/node_walker/validators/matchers.rs index 4a1e98b..a1cca40 100644 --- a/src/mdschema/validator/node_walker/validators/matchers.rs +++ b/src/mdschema/validator/node_walker/validators/matchers.rs @@ -1,4 +1,9 @@ #![allow(dead_code)] +//! Matcher validators for schema text. +//! +//! Types: +//! - `MatcherVsTextValidator`: handles pattern matching and capture logic used +//! when schema nodes embed matcher syntax inside textual content. use log::trace; use serde_json::json; @@ -13,8 +18,8 @@ use crate::mdschema::validator::matcher::matcher_extras::get_after_extras; use crate::mdschema::validator::node_pos_pair::NodePosPair; use crate::mdschema::validator::node_walker::ValidationResult; use crate::mdschema::validator::node_walker::helpers::compare_text_contents::compare_text_contents; -use crate::mdschema::validator::node_walker::validators::ValidatorImpl; -use crate::mdschema::validator::ts_types::{is_inline_code_node, is_text_node}; +use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; +use crate::mdschema::validator::ts_types::*; use crate::mdschema::validator::ts_utils::{ get_next_node, get_node_n_nodes_ahead, get_node_text, waiting_at_end, }; @@ -26,132 +31,130 @@ pub(super) struct MatcherVsTextValidator; impl ValidatorImpl for MatcherVsTextValidator { fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - validate_matcher_vs_text_impl(walker, got_eof) - } -} - -pub(super) struct TextualVsMatcherValidator; - -impl ValidatorImpl for TextualVsMatcherValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - validate_matcher_vs_text_impl(walker, got_eof) - } -} - -pub(super) struct LiteralMatcherVsTextualValidator; - -impl ValidatorImpl for LiteralMatcherVsTextualValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - validate_literal_matcher_vs_textual( - walker.schema_cursor(), - walker.input_cursor(), - walker.schema_str(), - walker.input_str(), - got_eof, - ) - } -} - -fn validate_matcher_vs_text_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); - - let schema_str = walker.schema_str(); - let input_str = walker.input_str(); - - let mut schema_cursor = walker.schema_cursor().clone(); - let mut input_cursor = walker.input_cursor().clone(); - - let schema_cursor_is_code_node = is_inline_code_node(&schema_cursor.node()); - let input_node = input_cursor.node(); - let schema_prefix_node = if schema_cursor_is_code_node { - let mut prev_cursor = schema_cursor.clone(); - if prev_cursor.goto_previous_sibling() && is_text_node(&prev_cursor.node()) { - Some(prev_cursor.node()) + let mut result = + ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); + + let mut schema_cursor = walker.schema_cursor().clone(); + let mut input_cursor = walker.input_cursor().clone(); + + let schema_cursor_is_code_node = is_inline_code_node(&schema_cursor.node()); + let input_node = input_cursor.node(); + let schema_prefix_node = if schema_cursor_is_code_node { + let mut prev_cursor = schema_cursor.clone(); + if prev_cursor.goto_previous_sibling() && is_text_node(&prev_cursor.node()) { + Some(prev_cursor.node()) + } else { + None + } + } else if is_text_node(&schema_cursor.node()) { + Some(schema_cursor.node()) } else { - None - } - } else if is_text_node(&schema_cursor.node()) { - Some(schema_cursor.node()) - } else { - unreachable!( - "only should be called with `code_span` or text but got {:?}", - schema_cursor.node() - ) - }; + unreachable!( + "only should be called with `code_span` or text but got {:?}", + schema_cursor.node() + ) + }; - let schema_suffix_node = { - // If there is a prefix and we're at the prefix, this comes two nodes later. - if schema_prefix_node.is_some() && !schema_cursor_is_code_node { - get_node_n_nodes_ahead(&schema_cursor, 2) - } else { - get_next_node(&schema_cursor) - } - }; + let schema_suffix_node = { + // If there is a prefix and we're at the prefix, this comes two nodes later. + if schema_prefix_node.is_some() && !schema_cursor_is_code_node { + get_node_n_nodes_ahead(&schema_cursor, 2) + } else { + get_next_node(&schema_cursor) + } + }; - let matcher = { - // Make sure we create the matcher when we are pointing at a `code_span` - let mut schema_cursor = schema_cursor.clone(); - if schema_prefix_node.is_some() && !schema_cursor_is_code_node { - schema_cursor.goto_next_sibling(); - } - Matcher::try_from_schema_cursor(&schema_cursor, schema_str) - }; + let matcher = { + // Make sure we create the matcher when we are pointing at a `code_span` + let mut schema_cursor = schema_cursor.clone(); + if schema_prefix_node.is_some() && !schema_cursor_is_code_node { + schema_cursor.goto_next_sibling(); + } + Matcher::try_from_schema_cursor(&schema_cursor, walker.schema_str()) + }; - // How far along we've validated the input. We'll update this as we go - let mut input_byte_offset = input_cursor.node().byte_range().start; + // How far along we've validated the input. We'll update this as we go + let mut input_byte_offset = input_cursor.node().byte_range().start; - // Descendant index of the input node, specifically the paragraph (not the interior text) - let input_cursor_descendant_index = input_cursor.descendant_index(); - let input_cursor_at_prefix = input_cursor.clone(); - input_cursor.goto_first_child(); + // Descendant index of the input node, specifically the paragraph (not the interior text) + let input_cursor_descendant_index = input_cursor.descendant_index(); + let input_cursor_at_prefix = input_cursor.clone(); + input_cursor.goto_first_child(); - // Preserve the cursor where it's pointing at the prefix node for error reporting - let mut schema_cursor_at_prefix = schema_cursor.clone(); - if schema_cursor_is_code_node { - let mut prev_cursor = schema_cursor.clone(); - if prev_cursor.goto_previous_sibling() && is_text_node(&prev_cursor.node()) { - schema_cursor_at_prefix = prev_cursor; + // Preserve the cursor where it's pointing at the prefix node for error reporting + let mut schema_cursor_at_prefix = schema_cursor.clone(); + if schema_cursor_is_code_node { + let mut prev_cursor = schema_cursor.clone(); + if prev_cursor.goto_previous_sibling() && is_text_node(&prev_cursor.node()) { + schema_cursor_at_prefix = prev_cursor; + } } - } - schema_cursor_at_prefix.goto_first_child(); + schema_cursor_at_prefix.goto_first_child(); - match at_text_and_next_at_literal_matcher(&schema_cursor, schema_str) { - Ok(Some(true)) => { - let prefix_result = validate_textual_vs_textual_direct( - &input_cursor, - &schema_cursor, - schema_str, - input_str, - got_eof, - ); - result.join_other_result(&prefix_result); - } - Err(error) => { - result.add_error(error); - return result; - } - _ => { - // Only do prefix verification if there is a prefix - if let Some(schema_prefix_node) = schema_prefix_node { - trace!("Validating prefix before matcher"); + match at_text_and_next_at_literal_matcher(&schema_cursor, walker.schema_str()) { + Ok(Some(true)) => { + let prefix_result = validate_textual_vs_textual_direct( + &input_cursor, + &schema_cursor, + walker.schema_str(), + walker.input_str(), + got_eof, + ); + result.join_other_result(&prefix_result); + } + Err(error) => { + result.add_error(error); + return result; + } + _ => { + // Only do prefix verification if there is a prefix + if let Some(schema_prefix_node) = schema_prefix_node { + trace!("Validating prefix before matcher"); + + let schema_prefix_str = &walker.schema_str()[schema_prefix_node.byte_range()]; + + // Calculate how much input we have available from the current offset + let input_prefix_len = walker.input_str().len() - input_byte_offset; + + // Check that the input extends enough that we can cover the full prefix. + if input_prefix_len >= schema_prefix_str.len() { + // We have enough input to compare the full prefix + let input_prefix_str = &walker.input_str() + [input_byte_offset..input_byte_offset + schema_prefix_str.len()]; + + // Do the actual prefix comparison + if schema_prefix_str != input_prefix_str { + trace!( + "Prefix mismatch: expected '{}', got '{}'", + schema_prefix_str, input_prefix_str + ); + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: schema_cursor_at_prefix.descendant_index(), + input_index: input_cursor_descendant_index, + expected: schema_prefix_str.into(), + actual: input_prefix_str.into(), + kind: NodeContentMismatchKind::Prefix, + }, + )); - let schema_prefix_str = &schema_str[schema_prefix_node.byte_range()]; + // If prefix validation fails don't try to validate further. + result.sync_cursor_pos(&schema_cursor, &input_cursor); - // Calculate how much input we have available from the current offset - let input_prefix_len = input_str.len() - input_byte_offset; + return result; + } - // Check that the input extends enough that we can cover the full prefix. - if input_prefix_len >= schema_prefix_str.len() { - // We have enough input to compare the full prefix - let input_prefix_str = - &input_str[input_byte_offset..input_byte_offset + schema_prefix_str.len()]; + trace!("Prefix matched successfully"); + input_byte_offset += schema_prefix_node.byte_range().len(); + } else if got_eof { + // We've reached EOF, so the input is complete and too short + let input_prefix_str = &walker.input_str()[input_byte_offset..]; - // Do the actual prefix comparison - if schema_prefix_str != input_prefix_str { trace!( - "Prefix mismatch: expected '{}', got '{}'", + "Prefix mismatch (input too short at EOF): expected '{}', got '{}'", schema_prefix_str, input_prefix_str ); + result.add_error(ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { schema_index: schema_cursor_at_prefix.descendant_index(), @@ -162,222 +165,232 @@ fn validate_matcher_vs_text_impl(walker: &ValidatorWalker, got_eof: bool) -> Val }, )); - // If prefix validation fails don't try to validate further. + result.sync_cursor_pos(&schema_cursor, &input_cursor); + return result; + } else { + // We haven't reached EOF yet, so partial match is OK + // Check if what we have so far matches + let input_prefix_str = &walker.input_str()[input_byte_offset..]; + let schema_prefix_partial = &schema_prefix_str[..input_prefix_str.len()]; + + trace!("Input prefix not long enough, but waiting at end of input"); + + if schema_prefix_partial != input_prefix_str { + trace!( + "Prefix partial mismatch: expected '{}', got '{}'", + schema_prefix_partial, input_prefix_str + ); + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: schema_cursor_at_prefix.descendant_index(), + input_index: input_cursor_descendant_index, + expected: schema_prefix_str.into(), + actual: input_prefix_str.into(), + kind: NodeContentMismatchKind::Prefix, + }, + )); + } + result.sync_cursor_pos(&schema_cursor, &input_cursor); return result; } + } + } + } - trace!("Prefix matched successfully"); - input_byte_offset += schema_prefix_node.byte_range().len(); - } else if got_eof { - // We've reached EOF, so the input is complete and too short - let input_prefix_str = &input_str[input_byte_offset..]; + // Don't validate after the prefix if there isn't enough content + if input_byte_offset >= input_node.byte_range().end { + if got_eof { + let schema_prefix_str = schema_prefix_node + .map(|node| &walker.schema_str()[node.byte_range()]) + .unwrap_or(""); - trace!( - "Prefix mismatch (input too short at EOF): expected '{}', got '{}'", - schema_prefix_str, input_prefix_str - ); + let best_prefix_input_we_can_do = + &walker.input_str()[input_cursor.node().byte_range().start..]; - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::NodeContentMismatch { - schema_index: schema_cursor_at_prefix.descendant_index(), - input_index: input_cursor_descendant_index, - expected: schema_prefix_str.into(), - actual: input_prefix_str.into(), - kind: NodeContentMismatchKind::Prefix, - }, - )); + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: schema_cursor_at_prefix.descendant_index(), + input_index: input_cursor_descendant_index, + expected: schema_prefix_str.into(), + actual: best_prefix_input_we_can_do.into(), + kind: NodeContentMismatchKind::Prefix, + }, + )); + } - result.sync_cursor_pos(&schema_cursor, &input_cursor); - return result; - } else { - // We haven't reached EOF yet, so partial match is OK - // Check if what we have so far matches - let input_prefix_str = &input_str[input_byte_offset..]; - let schema_prefix_partial = &schema_prefix_str[..input_prefix_str.len()]; + result.sync_cursor_pos(&schema_cursor, &input_cursor); - trace!("Input prefix not long enough, but waiting at end of input"); + return result; + } - if schema_prefix_partial != input_prefix_str { + // All input that comes after the expected prefix + let input_after_prefix = + walker.input_str()[input_byte_offset..input_cursor.node().byte_range().end].to_string(); + + match matcher { + Ok(matcher) => { + // Actually perform the match for the matcher + match matcher.match_str(&input_after_prefix) { + Some(matched_str) => { trace!( - "Prefix partial mismatch: expected '{}', got '{}'", - schema_prefix_partial, input_prefix_str + "Matcher successfully matched input: '{}' (length={})", + matched_str, + matched_str.len() ); - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::NodeContentMismatch { - schema_index: schema_cursor_at_prefix.descendant_index(), - input_index: input_cursor_descendant_index, - expected: schema_prefix_str.into(), - actual: input_prefix_str.into(), - kind: NodeContentMismatchKind::Prefix, - }, - )); - } - result.sync_cursor_pos(&schema_cursor, &input_cursor); + input_byte_offset += matched_str.len(); - return result; - } - } - } - } + // Good match! Add the matched node to the matches (if it has an id) + // + // If we're at the end though, don't add it just yet! + if !waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + if let Some(id) = matcher.id() { + trace!("Storing match for id '{}': '{}'", id, matched_str); + result.set_match(id, json!(matched_str)); + } else { + trace!("Matcher has no id, not storing match"); + } + } - // Don't validate after the prefix if there isn't enough content - if input_byte_offset >= input_node.byte_range().end { - if got_eof { - let schema_prefix_str = schema_prefix_node - .map(|node| &schema_str[node.byte_range()]) - .unwrap_or(""); + // Walk so that we are ON the `code_span` + schema_cursor.goto_next_sibling(); - let best_prefix_input_we_can_do = &input_str[input_cursor.node().byte_range().start..]; + // Walk down into the `code_span` and mark its child text as already validated! + { + let mut schema_cursor = schema_cursor.clone(); - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::NodeContentMismatch { - schema_index: schema_cursor_at_prefix.descendant_index(), - input_index: input_cursor_descendant_index, - expected: schema_prefix_str.into(), - actual: best_prefix_input_we_can_do.into(), - kind: NodeContentMismatchKind::Prefix, - }, - )); - } + schema_cursor.goto_first_child(); - result.sync_cursor_pos(&schema_cursor, &input_cursor); - - return result; - } + // Only dig in if we won't need to rematch again + if !waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + result.keep_farther_pos(&NodePosPair::from_cursors( + &schema_cursor, + &input_cursor, + )); + } + } + } + None => { + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + return result; + }; - // All input that comes after the expected prefix - let input_after_prefix = - input_str[input_byte_offset..input_cursor.node().byte_range().end].to_string(); + trace!( + "Matcher did not match input string: pattern={}, input='{}'", + matcher.pattern().to_string(), + input_after_prefix + ); - match matcher { - Ok(matcher) => { - // Actually perform the match for the matcher - match matcher.match_str(&input_after_prefix) { - Some(matched_str) => { - trace!( - "Matcher successfully matched input: '{}' (length={})", - matched_str, - matched_str.len() - ); + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor_descendant_index, + expected: matcher.pattern().to_string(), + actual: input_after_prefix.into(), + kind: NodeContentMismatchKind::Matcher, + }, + )); - input_byte_offset += matched_str.len(); - - // Good match! Add the matched node to the matches (if it has an id) - // - // If we're at the end though, don't add it just yet! - if !waiting_at_end(got_eof, walker.input_str(), &input_cursor) { - if let Some(id) = matcher.id() { - trace!("Storing match for id '{}': '{}'", id, matched_str); - result.set_match(id, json!(matched_str)); - } else { - trace!("Matcher has no id, not storing match"); + return result; + } + } + } + Err(error) => match error { + MatcherError::WasLiteralCode => { + // Move the schema/input to the code node before validating literal matchers. + let mut schema_cursor = schema_cursor.clone(); + let mut input_cursor = input_cursor_at_prefix.clone(); + + if schema_prefix_node.is_some() { + schema_cursor.goto_next_sibling(); + if !input_cursor.goto_next_sibling() { + result.sync_cursor_pos(&schema_cursor, &input_cursor); + return result; } } - // Walk so that we are ON the `code_span` - schema_cursor.goto_next_sibling(); + // Delegate to the literal matcher validator + return LiteralMatcherVsTextualValidator::validate( + &walker.with_cursors(&schema_cursor, &input_cursor), + got_eof, + ); + } + _ => result.add_error(ValidationError::SchemaError(SchemaError::MatcherError { + error, + schema_index: schema_cursor.descendant_index(), + })), + }, + } + + // Validate suffix if there is one + if let Some(schema_suffix_node) = schema_suffix_node { + schema_cursor.goto_next_sibling(); // code_span -> text - // Walk down into the `code_span` and mark its child text as already validated! - { - let mut schema_cursor = schema_cursor.clone(); + // Return early if it is not text + if !is_text_node(&schema_cursor.node()) { + return result; + } - schema_cursor.goto_first_child(); + // Everything that comes after the matcher + let schema_suffix = { + let text_node_after_code_node_str_contents = + get_node_text(&schema_suffix_node, walker.schema_str()); + // All text after the matcher node and maybe the text node right after it ("extras") + get_after_extras(text_node_after_code_node_str_contents).unwrap() + }; - // Only dig in if we won't need to rematch again - if !waiting_at_end(got_eof, walker.input_str(), &input_cursor) { - result.keep_farther_pos(&NodePosPair::from_cursors( - &schema_cursor, - &input_cursor, - )); - } - } - } - None => { - if waiting_at_end(got_eof, input_str, &input_cursor) { - return result; - }; + // Seek forward from the current input byte offset by the length of the suffix + let input_suffix_len = input_cursor.node().byte_range().end - input_byte_offset; + // Check if input_suffix is shorter than schema_suffix + let input_suffix = + &walker.input_str()[input_byte_offset..input_cursor.node().byte_range().end]; + + if input_suffix_len < schema_suffix.len() { + if got_eof { + // We've reached EOF, so the input is complete and too short trace!( - "Matcher did not match input string: pattern={}, input='{}'", - matcher.pattern().to_string(), - input_after_prefix + "Suffix mismatch (input too short at EOF): expected '{}', got '{}'", + schema_suffix, input_suffix ); result.add_error(ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { schema_index: schema_cursor.descendant_index(), input_index: input_cursor_descendant_index, - expected: matcher.pattern().to_string(), - actual: input_after_prefix.into(), - kind: NodeContentMismatchKind::Matcher, + expected: schema_suffix.into(), + actual: input_suffix.into(), + kind: NodeContentMismatchKind::Suffix, }, )); + } else { + // We haven't reached EOF yet, so partial match is OK + // Check if what we have so far matches + let schema_suffix_partial = &schema_suffix[..input_suffix.len()]; + if schema_suffix_partial != input_suffix { + trace!( + "Suffix partial mismatch: expected '{}', got '{}'", + schema_suffix_partial, input_suffix + ); - return result; - } - } - } - Err(error) => match error { - MatcherError::WasLiteralCode => { - // Move the schema/input to the code node before validating literal matchers. - let mut schema_cursor = schema_cursor.clone(); - let mut input_cursor = input_cursor_at_prefix.clone(); - - if schema_prefix_node.is_some() { - schema_cursor.goto_next_sibling(); - if !input_cursor.goto_next_sibling() { - result.sync_cursor_pos(&schema_cursor, &input_cursor); - return result; + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor_descendant_index, + expected: schema_suffix.into(), + actual: input_suffix.into(), + kind: NodeContentMismatchKind::Suffix, + }, + )); + } else { + trace!("Suffix partial match successful, waiting for more input"); } } - - // Delegate to the literal matcher validator - return validate_literal_matcher_vs_textual( - &schema_cursor, - &input_cursor, - schema_str, - input_str, - got_eof, - ); - } - _ => result.add_error(ValidationError::SchemaError(SchemaError::MatcherError { - error, - schema_index: schema_cursor.descendant_index(), - })), - }, - } - - // Validate suffix if there is one - if let Some(schema_suffix_node) = schema_suffix_node { - schema_cursor.goto_next_sibling(); // code_span -> text - - // Return early if it is not text - if !is_text_node(&schema_cursor.node()) { - return result; - } - - // Everything that comes after the matcher - let schema_suffix = { - let text_node_after_code_node_str_contents = - get_node_text(&schema_suffix_node, schema_str); - // All text after the matcher node and maybe the text node right after it ("extras") - get_after_extras(text_node_after_code_node_str_contents).unwrap() - }; - - // Seek forward from the current input byte offset by the length of the suffix - let input_suffix_len = input_cursor.node().byte_range().end - input_byte_offset; - - // Check if input_suffix is shorter than schema_suffix - let input_suffix = &input_str[input_byte_offset..input_cursor.node().byte_range().end]; - - if input_suffix_len < schema_suffix.len() { - if got_eof { - // We've reached EOF, so the input is complete and too short + } else if schema_suffix != input_suffix { trace!( - "Suffix mismatch (input too short at EOF): expected '{}', got '{}'", + "Suffix mismatch: expected '{}', got '{}'", schema_suffix, input_suffix ); @@ -391,229 +404,332 @@ fn validate_matcher_vs_text_impl(walker: &ValidatorWalker, got_eof: bool) -> Val }, )); } else { - // We haven't reached EOF yet, so partial match is OK - // Check if what we have so far matches - let schema_suffix_partial = &schema_suffix[..input_suffix.len()]; - if schema_suffix_partial != input_suffix { - trace!( - "Suffix partial mismatch: expected '{}', got '{}'", - schema_suffix_partial, input_suffix + trace!("Suffix matched successfully"); + + // We validated this one! Load the result with the new pos! + result.keep_farther_pos(&NodePosPair::from_cursors( + walker.schema_cursor(), + walker.input_cursor(), + )); + } + } + + result + } +} + +pub(super) struct TextualVsMatcherValidator; + +impl ValidatorImpl for TextualVsMatcherValidator { + fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + let mut result = + ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); + + let mut schema_cursor = walker.schema_cursor().clone(); + let mut input_cursor = walker.input_cursor().clone(); + + #[cfg(feature = "invariant_violations")] + if !is_inline_code_node(&schema_cursor.node()) || !is_inline_code_node(&input_cursor.node()) + { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "literal matcher validation expects code_span nodes" + ); + } + + // Walk into the code node and do regular textual validation. + { + let mut schema_cursor = schema_cursor.clone(); + let mut input_cursor = input_cursor.clone(); + input_cursor.goto_first_child(); + schema_cursor.goto_first_child(); + + #[cfg(feature = "invariant_violations")] + if !is_text_node(&schema_cursor.node()) || !is_text_node(&input_cursor.node()) { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "literal matcher validation expects text children" + ); + } + + let text_result = compare_text_contents( + walker.schema_str(), + walker.input_str(), + &schema_cursor, + &input_cursor, + false, + false, + ); + result.join_other_result(&text_result); + if text_result.has_errors() { + return result; + } + } + + // The schema cursor definitely has a text node after the code node, which + // at minimum contains "!" (which indicates that it is a literal matcher in + // the first place). + #[cfg(feature = "invariant_violations")] + if !schema_cursor.goto_next_sibling() && is_text_node(&schema_cursor.node()) { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "validate_literal_matcher_vs_text called with a matcher that is not literal. \ + A text node does not follow the schema." + ); + } + + let schema_node_str = get_node_text(&schema_cursor.node(), walker.schema_str()); + + let schema_node_str_has_more_than_extras = schema_node_str.len() > 1; + + // Now see if there is more text than just the "!" in the schema text node. + let schema_text_after_extras = match get_after_extras(schema_node_str) { + Some(text) => text, + None => { + #[cfg(feature = "invariant_violations")] + { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "we should have had extras in the matcher string" ); + } + } + }; + + #[cfg(feature = "invariant_violations")] + if !input_cursor.goto_next_sibling() && schema_node_str_has_more_than_extras { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "at this point we should already have counted the number of nodes, \ + factoring in literal matchers." + ); + } + + if !is_text_node(&input_cursor.node()) { + schema_cursor.goto_next_sibling(); + result.sync_cursor_pos(&schema_cursor, &input_cursor); + return result; + } + let input_text_after_code = get_node_text(&input_cursor.node(), walker.input_str()); + + // Partial match is OK if got_eof is false. + if input_text_after_code.len() < schema_text_after_extras.len() { + if !got_eof { + let schema_text_after_extras_to_compare_against_so_far = + &schema_text_after_extras[..input_text_after_code.len()]; + + // Do the partial comparison. + if schema_text_after_extras_to_compare_against_so_far != input_text_after_code { result.add_error(ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { schema_index: schema_cursor.descendant_index(), - input_index: input_cursor_descendant_index, - expected: schema_suffix.into(), - actual: input_suffix.into(), - kind: NodeContentMismatchKind::Suffix, + input_index: input_cursor.descendant_index(), + expected: schema_text_after_extras_to_compare_against_so_far.into(), + actual: input_text_after_code.into(), + kind: NodeContentMismatchKind::Literal, }, )); } else { - trace!("Suffix partial match successful, waiting for more input"); + // Return early for now. We don't want to move on because we + // will need to redo this part later until we've got EOF. + return result; } + } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + expected: schema_text_after_extras.into(), + actual: input_text_after_code.into(), + kind: NodeContentMismatchKind::Literal, + }, + )); } - } else if schema_suffix != input_suffix { - trace!( - "Suffix mismatch: expected '{}', got '{}'", - schema_suffix, input_suffix - ); - + } else if input_text_after_code.len() < schema_text_after_extras.len() { result.add_error(ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { schema_index: schema_cursor.descendant_index(), - input_index: input_cursor_descendant_index, - expected: schema_suffix.into(), - actual: input_suffix.into(), - kind: NodeContentMismatchKind::Suffix, + input_index: input_cursor.descendant_index(), + expected: schema_text_after_extras.into(), + actual: input_text_after_code.into(), + kind: NodeContentMismatchKind::Literal, }, )); } else { - trace!("Suffix matched successfully"); - - // We validated this one! Load the result with the new pos! - result.keep_farther_pos(&NodePosPair::from_cursors(&schema_cursor, &input_cursor)); + // Compare the whole thing. + if input_text_after_code != schema_text_after_extras { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + expected: schema_text_after_extras.into(), + actual: input_text_after_code.into(), + kind: NodeContentMismatchKind::Literal, + }, + )); + } } - } - - result -} -fn at_text_and_next_at_literal_matcher( - schema_cursor: &TreeCursor, - schema_str: &str, -) -> Result, ValidationError> { - if !is_text_node(&schema_cursor.node()) { - return Ok(None); - } - - let mut next_cursor = schema_cursor.clone(); - if !next_cursor.goto_next_sibling() || !is_inline_code_node(&next_cursor.node()) { - return Ok(None); - } + result.sync_cursor_pos(&schema_cursor, &input_cursor); - match Matcher::try_from_schema_cursor(&next_cursor, schema_str) { - Ok(_) => Ok(Some(false)), - Err(MatcherError::WasLiteralCode) => Ok(Some(true)), - Err(error) => Err(ValidationError::SchemaError(SchemaError::MatcherError { - error, - schema_index: schema_cursor.descendant_index(), - })), + result } } -pub(super) fn validate_literal_matcher_vs_textual( - schema_cursor: &TreeCursor, - input_cursor: &TreeCursor, - schema_str: &str, - input_str: &str, - got_eof: bool, -) -> ValidationResult { - let mut result = ValidationResult::from_cursors(schema_cursor, input_cursor); - - let mut schema_cursor = schema_cursor.clone(); - let mut input_cursor = input_cursor.clone(); - - #[cfg(feature = "invariant_violations")] - if !is_inline_code_node(&schema_cursor.node()) || !is_inline_code_node(&input_cursor.node()) { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "literal matcher validation expects code_span nodes" - ); - } +pub(super) struct LiteralMatcherVsTextualValidator; + +impl ValidatorImpl for LiteralMatcherVsTextualValidator { + fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + let schema_cursor: &TreeCursor = walker.schema_cursor(); + let input_cursor: &TreeCursor = walker.input_cursor(); + let mut result = ValidationResult::from_cursors(schema_cursor, input_cursor); - // Walk into the code node and do regular textual validation. - { let mut schema_cursor = schema_cursor.clone(); let mut input_cursor = input_cursor.clone(); - input_cursor.goto_first_child(); - schema_cursor.goto_first_child(); #[cfg(feature = "invariant_violations")] - if !is_text_node(&schema_cursor.node()) || !is_text_node(&input_cursor.node()) { + if !is_inline_code_node(&schema_cursor.node()) || !is_inline_code_node(&input_cursor.node()) + { invariant_violation!( result, &schema_cursor, &input_cursor, - "literal matcher validation expects text children" + "literal matcher validation expects code_span nodes" ); } - let text_result = compare_text_contents( - schema_str, - input_str, - &schema_cursor, - &input_cursor, - false, - false, - ); - result.join_other_result(&text_result); - if text_result.has_errors() { - return result; - } - } - - // The schema cursor definitely has a text node after the code node, which - // at minimum contains "!" (which indicates that it is a literal matcher in - // the first place). - #[cfg(feature = "invariant_violations")] - if !schema_cursor.goto_next_sibling() && is_text_node(&schema_cursor.node()) { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "validate_literal_matcher_vs_text called with a matcher that is not literal. \ - A text node does not follow the schema." - ); - } + // Walk into the code node and do regular textual validation. + { + let mut schema_cursor = schema_cursor.clone(); + let mut input_cursor = input_cursor.clone(); + input_cursor.goto_first_child(); + schema_cursor.goto_first_child(); - let schema_node_str = get_node_text(&schema_cursor.node(), schema_str); - - let schema_node_str_has_more_than_extras = schema_node_str.len() > 1; - - // Now see if there is more text than just the "!" in the schema text node. - let schema_text_after_extras = match get_after_extras(schema_node_str) { - Some(text) => text, - None => { #[cfg(feature = "invariant_violations")] - { + if !is_text_node(&schema_cursor.node()) || !is_text_node(&input_cursor.node()) { invariant_violation!( result, &schema_cursor, &input_cursor, - "we should have had extras in the matcher string" + "literal matcher validation expects text children" ); } + + let text_result = compare_text_contents( + walker.schema_str(), + walker.input_str(), + &schema_cursor, + &input_cursor, + false, + false, + ); + result.join_other_result(&text_result); + if text_result.has_errors() { + return result; + } } - }; - #[cfg(feature = "invariant_violations")] - if !input_cursor.goto_next_sibling() && schema_node_str_has_more_than_extras { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "at this point we should already have counted the number of nodes, \ + // The schema cursor definitely has a text node after the code node, which + // at minimum contains "!" (which indicates that it is a literal matcher in + // the first place). + #[cfg(feature = "invariant_violations")] + if !schema_cursor.goto_next_sibling() && is_text_node(&schema_cursor.node()) { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "validate_literal_matcher_vs_text called with a matcher that is not literal. \ + A text node does not follow the schema." + ); + } + + let schema_node_str = get_node_text(&schema_cursor.node(), walker.schema_str()); + + let schema_node_str_has_more_than_extras = schema_node_str.len() > 1; + + // Now see if there is more text than just the "!" in the schema text node. + let schema_text_after_extras = match get_after_extras(schema_node_str) { + Some(text) => text, + None => { + #[cfg(feature = "invariant_violations")] + { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "we should have had extras in the matcher string" + ); + } + } + }; + + #[cfg(feature = "invariant_violations")] + if !input_cursor.goto_next_sibling() && schema_node_str_has_more_than_extras { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "at this point we should already have counted the number of nodes, \ factoring in literal matchers." - ); - } + ); + } - if !is_text_node(&input_cursor.node()) { - schema_cursor.goto_next_sibling(); - result.sync_cursor_pos(&schema_cursor, &input_cursor); - return result; - } + if !is_text_node(&input_cursor.node()) { + schema_cursor.goto_next_sibling(); + result.sync_cursor_pos(&schema_cursor, &input_cursor); + return result; + } - let input_text_after_code = input_cursor.node().utf8_text(input_str.as_bytes()).unwrap(); + let input_text_after_code = input_cursor + .node() + .utf8_text(walker.input_str().as_bytes()) + .unwrap(); - // Partial match is OK if got_eof is false. - if input_text_after_code.len() < schema_text_after_extras.len() { - if !got_eof { - let schema_text_after_extras_to_compare_against_so_far = - &schema_text_after_extras[..input_text_after_code.len()]; + // Partial match is OK if got_eof is false. + if input_text_after_code.len() < schema_text_after_extras.len() { + if !got_eof { + let schema_text_after_extras_to_compare_against_so_far = + &schema_text_after_extras[..input_text_after_code.len()]; - // Do the partial comparison. - if schema_text_after_extras_to_compare_against_so_far != input_text_after_code { + // Do the partial comparison. + if schema_text_after_extras_to_compare_against_so_far != input_text_after_code { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + expected: schema_text_after_extras_to_compare_against_so_far.into(), + actual: input_text_after_code.into(), + kind: NodeContentMismatchKind::Literal, + }, + )); + } else { + // Return early for now. We don't want to move on because we + // will need to redo this part later until we've got EOF. + return result; + } + } else { result.add_error(ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { schema_index: schema_cursor.descendant_index(), input_index: input_cursor.descendant_index(), - expected: schema_text_after_extras_to_compare_against_so_far.into(), + expected: schema_text_after_extras.into(), actual: input_text_after_code.into(), kind: NodeContentMismatchKind::Literal, }, )); - } else { - // Return early for now. We don't want to move on because we - // will need to redo this part later until we've got EOF. - return result; } - } else { - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::NodeContentMismatch { - schema_index: schema_cursor.descendant_index(), - input_index: input_cursor.descendant_index(), - expected: schema_text_after_extras.into(), - actual: input_text_after_code.into(), - kind: NodeContentMismatchKind::Literal, - }, - )); - } - } else if input_text_after_code.len() < schema_text_after_extras.len() { - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::NodeContentMismatch { - schema_index: schema_cursor.descendant_index(), - input_index: input_cursor.descendant_index(), - expected: schema_text_after_extras.into(), - actual: input_text_after_code.into(), - kind: NodeContentMismatchKind::Literal, - }, - )); - } else { - // Compare the whole thing. - if input_text_after_code != schema_text_after_extras { + } else if input_text_after_code.len() < schema_text_after_extras.len() { result.add_error(ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { schema_index: schema_cursor.descendant_index(), @@ -623,33 +739,65 @@ pub(super) fn validate_literal_matcher_vs_textual( kind: NodeContentMismatchKind::Literal, }, )); + } else { + // Compare the whole thing. + if input_text_after_code != schema_text_after_extras { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + expected: schema_text_after_extras.into(), + actual: input_text_after_code.into(), + kind: NodeContentMismatchKind::Literal, + }, + )); + } } + + result.sync_cursor_pos(&schema_cursor, &input_cursor); + + result } +} - result.sync_cursor_pos(&schema_cursor, &input_cursor); +fn at_text_and_next_at_literal_matcher( + schema_cursor: &TreeCursor, + schema_str: &str, +) -> Result, ValidationError> { + if !is_text_node(&schema_cursor.node()) { + return Ok(None); + } - result + let mut next_cursor = schema_cursor.clone(); + if !next_cursor.goto_next_sibling() || !is_inline_code_node(&next_cursor.node()) { + return Ok(None); + } + + match Matcher::try_from_schema_cursor(&next_cursor, schema_str) { + Ok(_) => Ok(Some(false)), + Err(MatcherError::WasLiteralCode) => Ok(Some(true)), + Err(error) => Err(ValidationError::SchemaError(SchemaError::MatcherError { + error, + schema_index: schema_cursor.descendant_index(), + })), + } } #[cfg(test)] mod tests { use serde_json::json; - use crate::mdschema::validator::errors::{ - NodeContentMismatchKind, SchemaViolationError, ValidationError, - }; - use crate::mdschema::validator::node_pos_pair::NodePosPair; - use crate::mdschema::validator::node_walker::validators::test_utils::ValidatorTester; - use crate::mdschema::validator::node_walker::validators::{ - Validator, textual::TextualVsTextualValidator, - }; - use crate::mdschema::validator::ts_types::{ - both_are_inline_code, both_are_paragraphs, is_paragraph_node, - }; - use crate::mdschema::validator::ts_utils::parse_markdown; - use crate::mdschema::validator::validator_walker::ValidatorWalker; - + use super::super::test_utils::ValidatorTester; + use super::super::textual::TextualVsTextualValidator; use super::{LiteralMatcherVsTextualValidator, MatcherVsTextValidator}; + use crate::mdschema::validator::node_walker::validators::Validator; + use crate::mdschema::validator::{ + errors::{NodeContentMismatchKind, SchemaViolationError, ValidationError}, + node_pos_pair::NodePosPair, + ts_types::*, + ts_utils::parse_markdown, + validator_walker::ValidatorWalker, + }; #[test] fn test_validate_matcher_vs_text_partial() { @@ -857,8 +1005,8 @@ mod tests { }) => { assert_eq!(actual, "bad p"); assert_eq!(expected, "good prefix "); - assert_eq!(*input_index, 2); - assert_eq!(*schema_index, 2); + assert_eq!(input_index, &2); + assert_eq!(schema_index, &2); } _ => panic!( "Expected a prefix mismatch error, got: {:?}", diff --git a/src/mdschema/validator/node_walker/validators/mod.rs b/src/mdschema/validator/node_walker/validators/mod.rs index a350b2c..2f400e4 100644 --- a/src/mdschema/validator/node_walker/validators/mod.rs +++ b/src/mdschema/validator/node_walker/validators/mod.rs @@ -1,3 +1,20 @@ +//! Node-walker validators. +//! +//! Types: +//! - `ValidatorImpl`: core validator implementation trait. +//! - `Validator`: wrapper used to get tracing on every validator call. +//! +//! Validator implementations: +//! - `nodes::NodeVsNodeValidator`: dispatches between textual, code, list, table, heading, quote, and link validators. +//! - `textual::TextualVsTextualValidator`: compares textual and inline code nodes, delegating matcher work as needed. +//! - `matchers::MatcherVsTextValidator`: handles schema nodes that embed matcher syntax inside text or code spans. +//! - `code::CodeVsCodeValidator`: validates fenced code blocks (matcher-based language, capture support). +//! - `headings::HeadingVsHeadingValidator`: ensures heading kinds match and defers to textual container comparison. +//! - `quotes::QuoteVsQuoteValidator`: validates block quotes by walking their contents with textual container logic. +//! - `links::LinkVsLinkValidator`: checks link and image destinations plus alt text, with matcher coverage. +//! - `tables::TableVsTableValidator`: walks table rows/cells and hands off textual cells to textual container validation. +//! - `lists::ListVsListValidator`: aligns schema and input list items, handling nested structures and matcher-aware text. +//! - `containers::TextualContainerVsTextualContainerValidator`: walks inline container nodes and compares literal/matcher-driven text. #[allow(dead_code)] use tracing::instrument; @@ -6,6 +23,7 @@ use crate::mdschema::validator::{ }; pub(super) mod code; +pub(super) mod containers; pub(super) mod headings; pub(super) mod links; pub(super) mod lists; @@ -14,7 +32,6 @@ pub(crate) mod nodes; pub(super) mod quotes; pub(super) mod tables; pub(super) mod textual; -pub(super) mod textual_container; pub trait ValidatorImpl { fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult; diff --git a/src/mdschema/validator/node_walker/validators/nodes.rs b/src/mdschema/validator/node_walker/validators/nodes.rs index 2b0fc6c..b8a6a01 100644 --- a/src/mdschema/validator/node_walker/validators/nodes.rs +++ b/src/mdschema/validator/node_walker/validators/nodes.rs @@ -1,3 +1,8 @@ +//! Node dispatch validator. +//! +//! Types: +//! - `NodeVsNodeValidator`: routes node-vs-node checks to the specific validator +//! based on node kinds and performs shared structural checks. use log::trace; use crate::mdschema::validator::errors::{SchemaError, ValidationError}; @@ -5,19 +10,15 @@ use crate::mdschema::validator::node_pos_pair::NodePosPair; use crate::mdschema::validator::node_walker::ValidationResult; use crate::mdschema::validator::node_walker::helpers::check_repeating_matchers::check_repeating_matchers; use crate::mdschema::validator::node_walker::validators::code::CodeVsCodeValidator; +use crate::mdschema::validator::node_walker::validators::containers::TextualContainerVsTextualContainerValidator; use crate::mdschema::validator::node_walker::validators::headings::HeadingVsHeadingValidator; use crate::mdschema::validator::node_walker::validators::links::LinkVsLinkValidator; use crate::mdschema::validator::node_walker::validators::lists::ListVsListValidator; use crate::mdschema::validator::node_walker::validators::quotes::QuoteVsQuoteValidator; use crate::mdschema::validator::node_walker::validators::tables::TableVsTableValidator; use crate::mdschema::validator::node_walker::validators::textual::TextualVsTextualValidator; -use crate::mdschema::validator::node_walker::validators::textual_container::TextualContainerVsTextualContainerValidator; use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; -use crate::mdschema::validator::ts_types::{ - both_are_codeblocks, both_are_headings, both_are_image_nodes, both_are_link_nodes, - both_are_list_nodes, both_are_matching_top_level_nodes, both_are_quotes, both_are_rulers, - both_are_tables, both_are_textual_containers, both_are_textual_nodes, -}; +use crate::mdschema::validator::ts_types::*; use crate::mdschema::validator::ts_utils::waiting_at_end; use crate::mdschema::validator::validator_walker::ValidatorWalker; use crate::{compare_node_children_lengths_check, compare_node_kinds_check, invariant_violation}; @@ -40,9 +41,6 @@ impl ValidatorImpl for NodeVsNodeValidator { fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); - let schema_str = walker.schema_str(); - let input_str = walker.input_str(); - let schema_node = walker.schema_cursor().node(); let input_node = walker.input_cursor().node(); @@ -85,7 +83,8 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida // the containers to contain repeating matchers since the same utility // is used for list validation. - if let Some(repeating_matcher_index) = check_repeating_matchers(&schema_cursor, schema_str) + if let Some(repeating_matcher_index) = + check_repeating_matchers(&schema_cursor, walker.schema_str()) { result.add_error(ValidationError::SchemaError( SchemaError::RepeatingMatcherInTextContainer { @@ -169,7 +168,7 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida result.join_other_result(&new_result); result.sync_cursor_pos(&schema_cursor, &input_cursor); } - (true, false) if waiting_at_end(got_eof, input_str, &input_cursor) => { + (true, false) if waiting_at_end(got_eof, walker.input_str(), &input_cursor) => { // Stop for now. We will revalidate from here later. result.set_farthest_reached_pos(parent_pos); return result; @@ -194,7 +193,7 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida result.join_other_result(&new_result); result.sync_cursor_pos(&schema_cursor, &input_cursor); } - (true, false) if waiting_at_end(got_eof, input_str, &input_cursor) => { + (true, false) if waiting_at_end(got_eof, walker.input_str(), &input_cursor) => { // Stop for now. We will revalidate from here later. result.set_farthest_reached_pos(parent_pos); return result; @@ -208,7 +207,13 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida return result; } else { // otherwise, at the minimum check the type - compare_node_kinds_check!(schema_cursor, input_cursor, schema_str, input_str, result); + compare_node_kinds_check!( + schema_cursor, + input_cursor, + walker.schema_str(), + walker.input_str(), + result + ); if result.has_errors() { return result; @@ -232,10 +237,11 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida mod tests { use serde_json::json; + use super::super::test_utils::ValidatorTester; + use super::NodeVsNodeValidator; use crate::mdschema::validator::{ errors::{ChildrenCount, SchemaViolationError, ValidationError}, node_pos_pair::NodePosPair, - node_walker::validators::{nodes::NodeVsNodeValidator, test_utils::ValidatorTester}, }; #[test] @@ -429,8 +435,8 @@ mod tests { SchemaViolationError::ChildrenLengthMismatch { expected, .. }, ) => { assert_eq!( - *expected, - ChildrenCount::SpecificCount(0), + expected, + &ChildrenCount::SpecificCount(0), "expected should be 0 for empty schema" ); } diff --git a/src/mdschema/validator/node_walker/validators/quotes.rs b/src/mdschema/validator/node_walker/validators/quotes.rs index 7001ff7..bfec545 100644 --- a/src/mdschema/validator/node_walker/validators/quotes.rs +++ b/src/mdschema/validator/node_walker/validators/quotes.rs @@ -1,5 +1,10 @@ +//! Block quote validator for node-walker comparisons. +//! +//! Types: +//! - `QuoteVsQuoteValidator`: verifies quote node kinds and delegates content +//! validation to textual containers. use crate::mdschema::validator::node_walker::ValidationResult; -use crate::mdschema::validator::node_walker::validators::textual_container::TextualContainerVsTextualContainerValidator; +use crate::mdschema::validator::node_walker::validators::containers::TextualContainerVsTextualContainerValidator; use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; use crate::mdschema::validator::validator_walker::ValidatorWalker; use crate::{compare_node_kinds_check, invariant_violation}; @@ -64,10 +69,11 @@ impl ValidatorImpl for QuoteVsQuoteValidator { #[cfg(test)] mod tests { + use super::super::test_utils::ValidatorTester; + use super::QuoteVsQuoteValidator; use crate::mdschema::validator::{ errors::{NodeContentMismatchKind, SchemaViolationError, ValidationError}, node_pos_pair::NodePosPair, - node_walker::validators::{quotes::QuoteVsQuoteValidator, test_utils::ValidatorTester}, }; #[test] diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index f631bff..97fcb89 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -1,3 +1,8 @@ +//! Table validator for node-walker comparisons. +//! +//! Types: +//! - `TableVsTableValidator`: validates table structure (rows, headers, cells) +//! and delegates cell content checks to textual container validation. // use std::os::raw::c_short; // use std::rc::Rc; // use thiserror::Error; @@ -8,11 +13,10 @@ use crate::mdschema::validator::errors::{ }; use crate::mdschema::validator::node_pos_pair::NodePosPair; use crate::mdschema::validator::node_walker::ValidationResult; -use crate::mdschema::validator::node_walker::validators::textual_container::TextualContainerVsTextualContainerValidator; +use crate::mdschema::validator::node_walker::validators::containers::TextualContainerVsTextualContainerValidator; use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; #[cfg(feature = "invariant_violations")] -use crate::mdschema::validator::ts_types::{both_are_table_cells, both_are_table_headers}; -use crate::mdschema::validator::ts_types::{both_are_table_delimiter_rows, both_are_tables}; +use crate::mdschema::validator::ts_types::*; use crate::mdschema::validator::ts_utils::waiting_at_end; use crate::mdschema::validator::validator_walker::ValidatorWalker; use tree_sitter::TreeCursor; @@ -256,11 +260,11 @@ fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { #[cfg(test)] mod tests { + use super::super::test_utils::ValidatorTester; use super::*; use crate::mdschema::validator::{ errors::{NodeContentMismatchKind, SchemaViolationError, ValidationError}, - node_walker::validators::test_utils::ValidatorTester, - ts_types::both_are_tables, + node_pos_pair::NodePosPair, }; use serde_json::json; diff --git a/src/mdschema/validator/node_walker/validators/textual.rs b/src/mdschema/validator/node_walker/validators/textual.rs index 43b9ad6..c7f134b 100644 --- a/src/mdschema/validator/node_walker/validators/textual.rs +++ b/src/mdschema/validator/node_walker/validators/textual.rs @@ -1,3 +1,8 @@ +//! Textual node validator for node-walker comparisons. +//! +//! Types: +//! - `TextualVsTextualValidator`: compares text and inline code nodes, delegating +//! to matcher validation when schema content contains matcher syntax. use tracing::instrument; use tree_sitter::TreeCursor; @@ -9,7 +14,7 @@ use crate::mdschema::validator::node_walker::validators::matchers::MatcherVsText use crate::mdschema::validator::validator_walker::ValidatorWalker; use crate::mdschema::validator::{ node_walker::{ValidationResult, validators::Validator}, - ts_types::{both_are_textual_nodes, is_inline_code_node, is_text_node}, + ts_types::*, ts_utils::{get_next_node, waiting_at_end}, }; @@ -108,12 +113,9 @@ pub(super) fn validate_textual_vs_textual_direct( mod tests { use serde_json::json; + use super::super::test_utils::ValidatorTester; use super::TextualVsTextualValidator; - use crate::mdschema::validator::{ - node_pos_pair::NodePosPair, - node_walker::validators::test_utils::ValidatorTester, - ts_types::{both_are_inline_code, both_are_text_nodes}, - }; + use crate::mdschema::validator::{node_pos_pair::NodePosPair, ts_types::*}; #[test] fn test_validate_textual_vs_textual_with_literal_matcher() { diff --git a/src/mdschema/validator/node_walker/validators/textual_container.rs b/src/mdschema/validator/node_walker/validators/textual_container.rs deleted file mode 100644 index baf3751..0000000 --- a/src/mdschema/validator/node_walker/validators/textual_container.rs +++ /dev/null @@ -1,383 +0,0 @@ -use tree_sitter::TreeCursor; - -use crate::invariant_violation; -use crate::mdschema::validator::validator_walker::ValidatorWalker; -use crate::mdschema::validator::{ - errors::*, - matcher::{ - matcher::{Matcher, MatcherError}, - matcher_extras::get_all_extras, - }, - node_walker::{ - ValidationResult, - helpers::expected_input_nodes::expected_input_nodes, - validators::{ - Validator, ValidatorImpl, links::LinkVsLinkValidator, - textual::TextualVsTextualValidator, - }, - }, - ts_types::{ - both_are_image_nodes, both_are_link_nodes, both_are_textual_containers, is_inline_code_node, - is_text_node, - }, - ts_utils::{count_siblings, get_next_node, get_node_text}, -}; - -/// Validate a textual region of input against a textual region of schema. -/// -/// Takes two cursors pointing at text containers in the schema and input, and -/// validates them. The input text container may have a single matcher, and -/// potentially many other types of nodes. For example: -/// -/// Schema: -/// ```md -/// **Test** _*test*_ `test///`! `match:/test/` *foo*. -/// ``` -/// -/// Input: -/// ```md -/// **Test** _*test*_ `test///`! test *foo*. -/// -/// # Algorithm -/// -/// This works by: -/// -/// 1. Count the number of top level matchers in the schema. Find the first -/// valid one. Then keep going, but if there are more than 1, error. -/// 2. Count the number of nodes for both the input and schema using special -/// utility that takes into account literal matchers. -/// 3. Walk the input and schema cursors at the same rate, and walk down ane -/// recurse, which takes us to our base case of directly validating the contents -/// and kind of the node. If the node we are at is a code node, look at it and -/// the next node. If the two nodes correspond to a literal matcher: -/// - Match the inside of the matcher against the corresponding code node in the input. -/// - Then if there is additional text in the subsequent text node after the code node, -/// check that there is a text node in the input, maybe error, and if there is, -/// validate that the contents of the rest of it is the same. -/// - Then move to the next node pair, hopping two nodes at once for the schema node. -pub(super) struct TextualContainerVsTextualContainerValidator; - -impl ValidatorImpl for TextualContainerVsTextualContainerValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - validate_textual_container_vs_textual_container_impl(walker, got_eof) - } -} - -fn validate_textual_container_vs_textual_container_impl( - walker: &ValidatorWalker, - got_eof: bool, -) -> ValidationResult { - let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); - - let schema_str = walker.schema_str(); - - let mut schema_cursor = walker.schema_cursor().clone(); - let mut input_cursor = walker.input_cursor().clone(); - - #[cfg(feature = "invariant_violations")] - if !both_are_textual_containers(&schema_cursor.node(), &input_cursor.node()) { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "expected textual container nodes" - ); - } - - match count_non_literal_matchers_in_children(&schema_cursor, schema_str) { - Ok(non_repeating_matchers_count) if non_repeating_matchers_count > 1 && got_eof => result - .add_error(ValidationError::SchemaError( - SchemaError::MultipleMatchersInNodeChildren { - schema_index: schema_cursor.descendant_index(), - received: non_repeating_matchers_count, - }, - )), - Ok(_) => { - // Exactly one non repeating matcher is OK! - } - Err(err) => { - result.add_error(err); - - return result; - } - } - - let (expected_input_node_count, actual_input_node_count) = { - let mut schema_cursor = schema_cursor.clone(); - schema_cursor.goto_first_child(); - - let mut input_cursor = input_cursor.clone(); - input_cursor.goto_first_child(); - - let expected_input_node_count = match expected_input_nodes(&schema_cursor, schema_str) { - Ok(expected_input_node_count) => expected_input_node_count, - Err(error) => { - result.add_error(error); - return result; - } - }; - - let actual_input_node_count = count_siblings(&input_cursor) + 1; // including the node we are currently at - - (expected_input_node_count, actual_input_node_count) - }; - - if (actual_input_node_count != expected_input_node_count) && got_eof { - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::ChildrenLengthMismatch { - schema_index: schema_cursor.descendant_index(), - input_index: input_cursor.descendant_index(), - expected: ChildrenCount::from_specific(expected_input_node_count), - actual: actual_input_node_count, - }, - )); - } - - // Go from the container to the first child in the container, and then - // iterate over the siblings at the same rate. - input_cursor.goto_first_child(); - schema_cursor.goto_first_child(); - - loop { - let pair_result = if both_are_link_nodes(&schema_cursor.node(), &input_cursor.node()) - || both_are_image_nodes(&schema_cursor.node(), &input_cursor.node()) - { - LinkVsLinkValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ) - } else { - let new_result = TextualVsTextualValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); - new_result.walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); - new_result - }; - - result.join_other_result(&pair_result); - - if !schema_cursor.goto_next_sibling() || !input_cursor.goto_next_sibling() { - break; - } - } - - result -} - -/// Count the number of matchers, starting at some cursor pointing to a textual -/// container, and iterating through all of its children. -/// -/// Returns the number of matchers, or a `ValidationError` that is probably a -/// `MatcherError` due to failing to construct a matcher given a code node that -/// is not marked as literal. -fn count_non_literal_matchers_in_children( - schema_cursor: &TreeCursor, - schema_str: &str, -) -> Result { - let mut count = 0; - let mut cursor = schema_cursor.clone(); - - cursor.goto_first_child(); - - loop { - if !is_inline_code_node(&cursor.node()) { - if !cursor.goto_next_sibling() { - break; - } else { - continue; - } - } - - // If the following node is a text node, then it may have extras, so grab them. - let extras_str = match get_next_node(&cursor) - .filter(|n| is_text_node(n)) - .map(|next_node| { - let next_node_str = get_node_text(&next_node, schema_str); - get_all_extras(next_node_str) - }) { - Some(Ok(extras)) => Some(extras), - Some(Err(error)) => { - return Err(ValidationError::SchemaError(SchemaError::MatcherError { - error: error.into(), - schema_index: schema_cursor.descendant_index(), - })); - } - None => None, - }; - - let pattern_str = get_node_text(&cursor.node(), schema_str); - - match Matcher::try_from_pattern_and_suffix_str(pattern_str, extras_str) { - Ok(_) => count += 1, - Err(MatcherError::WasLiteralCode) => { - // Don't count it, but this is an OK error - } - Err(err) => { - return Err(ValidationError::SchemaError(SchemaError::MatcherError { - error: err, - schema_index: cursor.descendant_index(), - })); - } - } - - if !cursor.goto_next_sibling() { - break; - } - } - - Ok(count) -} - -#[cfg(test)] -mod tests { - use serde_json::json; - - use super::TextualContainerVsTextualContainerValidator; - use crate::mdschema::validator::{ - errors::{SchemaError, ValidationError}, - matcher::matcher::MatcherError, - node_pos_pair::NodePosPair, - node_walker::validators::{ - test_utils::ValidatorTester, textual_container::count_non_literal_matchers_in_children, - }, - ts_types::{both_are_textual_containers, is_heading_content_node}, - ts_utils::parse_markdown, - }; - - #[test] - fn test_count_non_literal_matchers_in_children_invalid_matcher() { - let schema_str = "test `_*test*_`"; - let schema_tree = parse_markdown(schema_str).unwrap(); - let mut schema_cursor = schema_tree.walk(); - schema_cursor.goto_first_child(); - schema_cursor.goto_first_child(); - - match count_non_literal_matchers_in_children(&schema_cursor, schema_str).unwrap_err() { - ValidationError::SchemaError(SchemaError::MatcherError { - error, - schema_index, - }) => { - assert_eq!(schema_index, 3); // the index of the code_span - match error { - MatcherError::MatcherInteriorRegexInvalid(_) => {} - _ => panic!("Expected MatcherInteriorRegexInvalid error"), - } - } - _ => panic!("Expected InvalidMatcher error"), - } - } - - #[test] - fn test_count_non_literal_matchers_in_children_only_literal_matcher() { - let schema_str = "test `_*test*_`! `test:/test/`"; - let schema_tree = parse_markdown(schema_str).unwrap(); - let mut schema_cursor = schema_tree.walk(); - schema_cursor.goto_first_child(); - schema_cursor.goto_first_child(); - - assert_eq!( - count_non_literal_matchers_in_children(&schema_cursor, schema_str).unwrap(), - 1 // one is literal - ); - } - - #[test] - fn test_count_non_literal_matchers_in_children_no_matchers() { - let schema_str = "test *foo* _bar_"; - let schema_tree = parse_markdown(schema_str).unwrap(); - let mut schema_cursor = schema_tree.walk(); - schema_cursor.goto_first_child(); - - assert_eq!( - count_non_literal_matchers_in_children(&schema_cursor, schema_str).unwrap(), - 0 - ); - } - - #[test] - fn test_validate_textual_container_vs_textual_container_with_content_and_link() { - let schema_str = "# Test Wolf [hi](https://example.com)"; - let input_str = "# Test Wolf [hi](https://foobar.com)"; - - let result = ValidatorTester::::from_strs( - schema_str, input_str, - ) - .walk() - .goto_first_child_then_unwrap() - .goto_first_child_then_unwrap() - .goto_next_sibling_then_unwrap() - .peek_nodes(|(s, i)| assert!(is_heading_content_node(s) && is_heading_content_node(i))) - .validate_complete(); - - assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(9, 9)); - assert!(!result.errors().is_empty()); - assert_eq!(result.value(), &json!({})); - } - - #[test] - fn test_validate_textual_container_vs_textual_container_header_content() { - let schema_str = "# Test Wolf"; - let input_str = "# Test Wolf"; - - let result = ValidatorTester::::from_strs( - schema_str, input_str, - ) - .walk() - .goto_first_child_then_unwrap() - .goto_first_child_then_unwrap() - .goto_next_sibling_then_unwrap() - .peek_nodes(|(s, i)| assert!(is_heading_content_node(s) && is_heading_content_node(i))) - .validate_complete(); - - assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(4, 4)); - assert_eq!(result.errors(), &vec![]); - assert_eq!(result.value(), &json!({})); - } - - #[test] - fn test_validate_textual_container_vs_textual_container_header_content_and_matcher() { - let schema_str = "# Test `name:/[a-zA-Z]+/`"; - let input_str = "# Test Wolf"; - - let result = ValidatorTester::::from_strs( - schema_str, input_str, - ) - .walk() - .goto_first_child_then_unwrap() - .goto_first_child_then_unwrap() - .goto_next_sibling_then_unwrap() - .peek_nodes(|(s, i)| assert!(is_heading_content_node(s) && is_heading_content_node(i))) - .validate_complete(); - - assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(6, 4)); - assert_eq!(result.errors(), &vec![]); - assert_eq!(result.value(), &json!({"name": "Wolf"})); - } - - #[test] - fn test_validate_textual_container_vs_textual_container_link_then_bad_node() { - let schema_str = "# Heading [test]({a:/a/}) `b:/b/`"; - let input_str = "# Heading [test](a) b"; - - let result = ValidatorTester::::from_strs( - schema_str, input_str, - ) - .walk() - .goto_first_child_then_unwrap() - .goto_first_child_then_unwrap() - .goto_next_sibling_then_unwrap() - .peek_nodes(|(s, i)| assert!(both_are_textual_containers(s, i))) - .validate_complete(); - - let errors = result.errors().to_vec(); - let value = result.value().clone(); - - assert_eq!( - *result.farthest_reached_pos(), - NodePosPair::from_pos(12, 10) - ); - assert_eq!(errors, vec![]); - assert_eq!(value, json!({"a": "a", "b": "b"})); - } -} diff --git a/src/mdschema/validator/ts_utils.rs b/src/mdschema/validator/ts_utils.rs index 4045bd9..a120aa6 100644 --- a/src/mdschema/validator/ts_utils.rs +++ b/src/mdschema/validator/ts_utils.rs @@ -4,9 +4,9 @@ use crate::invariant_violation; use tree_sitter::{Node, Parser, Tree, TreeCursor}; use tree_sitter_markdown::language; -use crate::mdschema::validator::{errors::ValidationError, validator::ValidatorState}; #[cfg(feature = "invariant_violations")] -use crate::mdschema::validator::ts_types::is_marker_node; +use crate::mdschema::validator::ts_types::*; +use crate::mdschema::validator::{errors::ValidationError, validator::ValidatorState}; use regex::Regex; use std::sync::LazyLock; @@ -342,8 +342,10 @@ pub fn validate_str(schema: &str, input: &str) -> (serde_json::Value, Vec Date: Fri, 9 Jan 2026 14:10:31 -0500 Subject: [PATCH 02/33] remove unused import --- .../validator/node_walker/validators/containers.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/mdschema/validator/node_walker/validators/containers.rs b/src/mdschema/validator/node_walker/validators/containers.rs index 62e5a9a..53f6b1e 100644 --- a/src/mdschema/validator/node_walker/validators/containers.rs +++ b/src/mdschema/validator/node_walker/validators/containers.rs @@ -325,11 +325,8 @@ mod tests { use super::{TextualContainerVsTextualContainerValidator, is_repeated_matcher_paragraph}; use crate::mdschema::validator::{ - errors::{SchemaError, ValidationError}, - node_pos_pair::NodePosPair, - node_walker::validators::test_utils::ValidatorTester, - ts_types::*, - ts_utils::parse_markdown, + node_pos_pair::NodePosPair, node_walker::validators::test_utils::ValidatorTester, + ts_types::*, ts_utils::parse_markdown, }; #[test] From 758526a0e7064c89733a45307926df76f16b5c11 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 14:44:34 -0500 Subject: [PATCH 03/33] add docs and more progress --- .../src/content/docs/matchers/02-matchers.mdx | 32 +++- examples/cli/input.md | 6 +- examples/cli/schema.md | 2 +- src/mdschema/validator/errors.rs | 179 +++++++++++------- .../helpers/node_children_lengths.rs | 6 +- .../node_walker/validators/containers.rs | 70 +++++-- .../validator/node_walker/validators/lists.rs | 27 ++- .../validator/node_walker/validators/nodes.rs | 8 +- src/mdschema/validator/validator.rs | 6 +- tests/misc.rs | 6 +- tests/rulers.rs | 4 +- 11 files changed, 219 insertions(+), 127 deletions(-) diff --git a/docs/src/content/docs/matchers/02-matchers.mdx b/docs/src/content/docs/matchers/02-matchers.mdx index c09e80b..df99f25 100644 --- a/docs/src/content/docs/matchers/02-matchers.mdx +++ b/docs/src/content/docs/matchers/02-matchers.mdx @@ -9,7 +9,7 @@ import TODO from "../../../components/TODO.astro"; Matchers allow you to validate dynamic content using regular expressions. A matcher is defined using inline code syntax with a specific format: `` `label:/pattern/` ``. -## Basic Syntax +# Syntax The basic matcher format is: @@ -40,7 +40,7 @@ The pattern is automatically anchored to the start (as if prefixed with `^`), so -## Matchers with Surrounding Text +## With Surrounding Text Matchers can be combined with literal text as prefixes and suffixes: @@ -116,7 +116,31 @@ Right now, you can only have one matcher per paragraph (collection of spanning e valid={false} /> -## Literal Code Blocks +# Repeating Paragraphs + + + +You can validate multiple paragraph nodes into an array by using a repeated matcher. The repeated matcher syntax will show up again for lists, it looks like `` {min,max} ``, where `min` and `max` are optional. + + + + + +The matched content is passed to the executable: +- As arguments (`$0`, `$1`, etc.) +- Via stdin (if command reads from stdin) + +# Literal Code Blocks To match inline code blocks literally instead of treating them as matchers, add `!` after the code block: @@ -150,7 +174,7 @@ Use `!!` to match a literal exclamation mark after code: valid={true} /> -## Execution Validation +# Execution Validation diff --git a/examples/cli/input.md b/examples/cli/input.md index b33c560..839fe4a 100644 --- a/examples/cli/input.md +++ b/examples/cli/input.md @@ -1 +1,5 @@ -test test +test + +test + +test diff --git a/examples/cli/schema.md b/examples/cli/schema.md index ec476a0..442d3b8 100644 --- a/examples/cli/schema.md +++ b/examples/cli/schema.md @@ -1 +1 @@ -`test:/test/`{,} test +`test:/test/`{,} diff --git a/src/mdschema/validator/errors.rs b/src/mdschema/validator/errors.rs index 7983052..4e0fd7d 100644 --- a/src/mdschema/validator/errors.rs +++ b/src/mdschema/validator/errors.rs @@ -337,6 +337,16 @@ pub enum SchemaViolationError { kind: NodeContentMismatchKind, }, + /// Not enough nodes for a repeating paragraph. + NotEnoughNodesForRepeatingParagraph { + schema_index: usize, + input_index: usize, + /// Expected number of children from schema. + expected: ChildrenLengthRange, + /// Actual number of children in input. + actual: usize, + }, + /// Matcher appears in list context without repetition syntax. /// /// List nodes require matchers to use `{min,max}` syntax. @@ -350,7 +360,7 @@ pub enum SchemaViolationError { schema_index: usize, input_index: usize, /// Expected number of children from schema. - expected: ChildrenCount, + expected: ChildrenLengthRange, // min, max /// Actual number of children in input. actual: usize, }, @@ -383,6 +393,37 @@ pub enum SchemaViolationError { }, } +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct ChildrenLengthRange(pub usize, pub usize); + +impl From<(usize, usize)> for ChildrenLengthRange { + fn from((min, max): (usize, usize)) -> Self { + ChildrenLengthRange(min, max) + } +} + +impl From for ChildrenLengthRange { + fn from(min: usize) -> Self { + ChildrenLengthRange(min, min) + } +} + +impl PartialEq for ChildrenLengthRange { + fn eq(&self, other: &usize) -> bool { + self.0 == *other && self.1 == *other + } +} + +impl std::fmt::Display for ChildrenLengthRange { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let ChildrenLengthRange(min, max) = self; + match (min, max) { + (min, max) if min == max => write!(f, "exactly {}", min), + (min, max) => write!(f, "between {} and {}", min, max), + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum MalformedStructureKind { MissingListItemContent, @@ -406,6 +447,11 @@ impl fmt::Display for SchemaViolationError { } => { write!(f, "Expected {} '{}', found '{}'", kind, expected, actual) } + SchemaViolationError::NotEnoughNodesForRepeatingParagraph { + expected, actual, .. + } => { + write!(f, "Expected {} children, found {}", expected, actual) + } SchemaViolationError::NonRepeatingMatcherInListContext { .. } => { write!(f, "Non-repeating matcher used in list context") } @@ -435,34 +481,6 @@ impl fmt::Display for SchemaViolationError { } } -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub enum ChildrenCount { - SpecificCount(usize), - Range { min: usize, max: Option }, -} - -impl fmt::Display for ChildrenCount { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - ChildrenCount::SpecificCount(count) => write!(f, "{}", count), - ChildrenCount::Range { min, max } => match max { - Some(max_val) => write!(f, "between {} and {}", min, max_val), - None => write!(f, "at least {}", min), - }, - } - } -} - -impl ChildrenCount { - pub fn from_specific(count: usize) -> Self { - ChildrenCount::SpecificCount(count) - } - - pub fn from_range(min: usize, max: Option) -> Self { - ChildrenCount::Range { min, max } - } -} - /// Errors that occur during pretty-printing of validation errors. #[derive(Debug, Clone, Hash, PartialEq, Eq)] pub enum PrettyPrintError { @@ -576,6 +594,27 @@ fn validation_error_to_ariadne( ) .finish() } + SchemaViolationError::NotEnoughNodesForRepeatingParagraph { + schema_index: _, + input_index, + expected, + actual, + } => { + let node = find_node_by_index(tree.root_node(), *input_index); + let node_range = node.start_byte()..node.end_byte(); + + Report::build(ReportKind::Error, (filename, node_range.clone())) + .with_message("Not enough nodes for repeating paragraph") + .with_label( + Label::new((filename, node_range)) + .with_message(format!( + "Expected {} children but found {}.", + expected, actual + )) + .with_color(Color::Red), + ) + .finish() + } SchemaViolationError::NonRepeatingMatcherInListContext { schema_index, input_index, @@ -588,27 +627,27 @@ fn validation_error_to_ariadne( let input_range = input_node.start_byte()..input_node.end_byte(); Report::build(ReportKind::Error, (filename, input_range.clone())) - .with_message("Non-repeating matcher in repeating context") - .with_label( - Label::new((filename, input_range)) - .with_message( - "This input corresponds to a list node in the schema" - ) - .with_color(Color::Blue), - ) - .with_label( - Label::new((filename, schema_range)) - .with_message(format!( - "This matcher is in a list context but is not marked as repeating: '{}'", - schema_content - )) - .with_color(Color::Red), - ) - .with_help(r#" + .with_message("Non-repeating matcher in repeating context") + .with_label( + Label::new((filename, input_range)) + .with_message( + "This input corresponds to a list node in the schema" + ) + .with_color(Color::Blue), + ) + .with_label( + Label::new((filename, schema_range)) + .with_message(format!( + "This matcher is in a list context but is not marked as repeating: '{}'", + schema_content + )) + .with_color(Color::Red), + ) + .with_help(r#" You can mark a list node as repeating by adding a '{,} directly after the matcher, like - `myLabel:/foo/`{1,12} "#) - .finish() + .finish() } SchemaViolationError::ChildrenLengthMismatch { schema_index: _, @@ -633,7 +672,7 @@ You can mark a list node as repeating by adding a '{,} dir if parent.kind() == "list_item" { report = report.with_help( "If you want to allow any number of list items, use the {min,max} syntax \ - (e.g., `item:/pattern/`{1,} or `item:/pattern/`{0,})", + (e.g., `item:/pattern/`{1,} or `item:/pattern/`{0,})", ); } @@ -648,26 +687,26 @@ You can mark a list node as repeating by adding a '{,} dir let node_range = node.start_byte()..node.end_byte(); Report::build(ReportKind::Error, (filename, node_range.clone())) - .with_message("Nested list exceeds maximum depth") - .with_label( - Label::new((filename, node_range)) - .with_message(format!( - "List nesting exceeds maximum depth of {} level(s).", - max_depth, - )) - .with_color(Color::Red), - ) - .with_help( - "For schemas like:\n\ - - `num1:/\\d/`{1,}\n\ - \u{20} - `num2:/\\d/`{1,}{1,}\n\ - \n\ - You may need to adjust the repetition for the first matcher\n\ - to allow for the depth of the following ones. For example, you could\n\ - make that `num1:/\\d/`{1,}{1,}{1,} to allow for three levels of nesting (the one \ - below it, and the two allowed below that).", - ) - .finish() + .with_message("Nested list exceeds maximum depth") + .with_label( + Label::new((filename, node_range)) + .with_message(format!( + "List nesting exceeds maximum depth of {} level(s).", + max_depth, + )) + .with_color(Color::Red), + ) + .with_help( + "For schemas like:\n\ + - `num1:/\\d/`{1,}\n\ + \u{20} - `num2:/\\d/`{1,}{1,}\n\ + \n\ + You may need to adjust the repetition for the first matcher\n\ + to allow for the depth of the following ones. For example, you could\n\ + make that `num1:/\\d/`{1,}{1,}{1,} to allow for three levels of nesting (the one \ + below it, and the two allowed below that).", + ) + .finish() } SchemaViolationError::WrongListCount { schema_index, @@ -704,8 +743,8 @@ You can mark a list node as repeating by adding a '{,} dir ) .with_help( "The number of items in `matcher`{1,2} syntax refers to the number of \ - entries at the level of that matcher (deeper items are not included in \ - that count).", + entries at the level of that matcher (deeper items are not included in \ + that count).", ) .finish() } diff --git a/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs b/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs index 178e93c..7eae84a 100644 --- a/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs +++ b/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs @@ -1,6 +1,8 @@ use tree_sitter::TreeCursor; -use crate::mdschema::validator::errors::{ChildrenCount, SchemaViolationError, ValidationError}; +use crate::mdschema::validator::errors::{ + ChildrenLengthRange, SchemaViolationError, ValidationError, +}; /// Compare the number of children between schema and input nodes. /// @@ -31,7 +33,7 @@ pub fn compare_node_children_lengths( ValidationError::SchemaViolation(SchemaViolationError::ChildrenLengthMismatch { schema_index: schema_cursor.descendant_index(), input_index: input_cursor.descendant_index(), - expected: ChildrenCount::from_specific(schema_child_count), + expected: ChildrenLengthRange(schema_child_count, schema_child_count), actual: input_child_count, }); diff --git a/src/mdschema/validator/node_walker/validators/containers.rs b/src/mdschema/validator/node_walker/validators/containers.rs index 53f6b1e..7a8be0f 100644 --- a/src/mdschema/validator/node_walker/validators/containers.rs +++ b/src/mdschema/validator/node_walker/validators/containers.rs @@ -134,7 +134,7 @@ impl ValidatorImpl for TextualContainerVsTextualContainerValidator { SchemaViolationError::ChildrenLengthMismatch { schema_index: schema_cursor.descendant_index(), input_index: input_cursor.descendant_index(), - expected: ChildrenCount::from_specific(expected_input_node_count), + expected: expected_input_node_count.into(), actual: actual_input_node_count, }, )); @@ -220,7 +220,8 @@ pub(super) struct ParagraphVsRepeatedMatcherParagraphValidator; impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - let result = ValidationResult::from_cursors(walker.schema_cursor(), &walker.input_cursor()); + let mut result = + ValidationResult::from_cursors(walker.schema_cursor(), &walker.input_cursor()); let mut schema_cursor = walker.schema_cursor().clone(); let mut input_cursor = walker.input_cursor().clone(); @@ -238,26 +239,59 @@ impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { ); } - // Go from the container to the first child in the container, and then - // iterate over the siblings at the same rate. - match ( - input_cursor.goto_first_child(), - schema_cursor.goto_first_child(), - ) { - (true, true) => { - // Great, keep going - } - (false, false) => { - // nothing to do - return result; - } - (true, false) => todo!(), - (false, true) => todo!(), + if !schema_cursor.goto_first_child() { + #[cfg(feature = "invariant_violations")] + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "for repeating matchers we should always have a first child in the schema" + ); } match Matcher::try_from_schema_cursor(&schema_cursor, walker.schema_str()) { Ok(matcher) if matcher.is_repeated() => { - todo!() + let mut matches = vec![]; + + let min_count = matcher.extras().min_items().unwrap_or(0); + let max_count = matcher.extras().max_items(); + + if !input_cursor.goto_first_child() { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NotEnoughNodesForRepeatingParagraph { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + expected: (min_count, max_count.unwrap_or(min_count)).into(), + actual: 0, + }, + )); + return result; + } + + loop { + let current_match = TextualVsTextualValidator::validate( + &walker.with_cursors(&schema_cursor, &input_cursor), + got_eof, + ); + matches.push(current_match.value().clone()); + + let prev_sibling = input_cursor.clone(); + if input_cursor.goto_next_sibling() && is_paragraph_node(&input_cursor.node()) { + // continue + } else { + input_cursor.reset_to(&prev_sibling); + break; + } + } + + if let Some(id) = matcher.id() { + result.set_match( + id, + serde_json::Value::Array(matches.into_iter().collect()), + ); + } + + result } _ => { #[cfg(feature = "invariant_violations")] diff --git a/src/mdschema/validator/node_walker/validators/lists.rs b/src/mdschema/validator/node_walker/validators/lists.rs index a473345..f72a08e 100644 --- a/src/mdschema/validator/node_walker/validators/lists.rs +++ b/src/mdschema/validator/node_walker/validators/lists.rs @@ -22,9 +22,7 @@ use crate::mdschema::validator::{ }; use crate::{ invariant_violation, - mdschema::validator::errors::{ - ChildrenCount, SchemaError, SchemaViolationError, ValidationError, - }, + mdschema::validator::errors::{SchemaError, SchemaViolationError, ValidationError}, }; use log::trace; use serde_json::json; @@ -224,7 +222,7 @@ impl ValidatorImpl for ListVsListValidator { SchemaViolationError::ChildrenLengthMismatch { schema_index: schema_cursor.descendant_index(), input_index: input_cursor.descendant_index(), - expected: ChildrenCount::from_range(min_items, Some(max_items)), + expected: (min_items, max_items).into(), actual: validate_so_far + 1, // At least one more }, )); @@ -250,7 +248,7 @@ impl ValidatorImpl for ListVsListValidator { SchemaViolationError::ChildrenLengthMismatch { schema_index: schema_cursor.descendant_index(), input_index: input_cursor.descendant_index(), - expected: ChildrenCount::from_range(min_items, max_items), + expected: (min_items, max_items.unwrap_or(min_items)).into(), actual: validate_so_far, }, )); @@ -427,7 +425,7 @@ impl ValidatorImpl for ListVsListValidator { schema_index: at_list_schema_cursor.descendant_index(), input_index: at_list_input_cursor.descendant_index(), // +1 because we need to include this first node that we are currently on - expected: ChildrenCount::from_specific(literal_chunk_count), + expected: literal_chunk_count.into(), actual: available_literal_items, }, )); @@ -444,7 +442,7 @@ impl ValidatorImpl for ListVsListValidator { schema_index: at_list_schema_cursor.descendant_index(), input_index: at_list_input_cursor.descendant_index(), // +1 because we need to include this first node that we are currently on - expected: ChildrenCount::from_specific(remaining_schema_nodes + 1), + expected: (remaining_schema_nodes + 1).into(), actual: remaining_input_nodes + 1, }, )); @@ -774,10 +772,10 @@ mod tests { use super::{ ListVsListValidator, ensure_at_first_list_item, extract_repeated_matcher_from_list_item, }; + use crate::mdschema::validator::errors::ChildrenLengthRange; use crate::mdschema::validator::{ errors::{ - ChildrenCount, MalformedStructureKind, NodeContentMismatchKind, SchemaViolationError, - ValidationError, + MalformedStructureKind, NodeContentMismatchKind, SchemaViolationError, ValidationError, }, node_walker::ValidationResult, ts_types::*, @@ -1151,7 +1149,7 @@ Footer: test (footer isn't validated with_list_vs_list) SchemaViolationError::ChildrenLengthMismatch { schema_index: 1, input_index: 1, - expected: ChildrenCount::from_specific(6), + expected: 6.into(), actual: 3, } )] @@ -1184,7 +1182,7 @@ Footer: test (footer isn't validated with_list_vs_list) SchemaViolationError::ChildrenLengthMismatch { schema_index: 1, input_index: 1, - expected: ChildrenCount::from_specific(3), + expected: 3.into(), actual: 6, } )] @@ -1600,7 +1598,7 @@ Footer: test (footer isn't validated with_list_vs_list) SchemaViolationError::ChildrenLengthMismatch { schema_index: 2, input_index: 6, - expected: ChildrenCount::from_range(0, Some(2)), + expected: (0, 2).into(), actual: 3, } )], @@ -1798,10 +1796,7 @@ Footer: test (footer isn't validated with_list_vs_list) matches!( &result.errors()[0], ValidationError::SchemaViolation(SchemaViolationError::ChildrenLengthMismatch { - expected: ChildrenCount::Range { - min: 1, - max: Some(1) - }, + expected: ChildrenLengthRange(1, 1), actual: 2, // We detect "at least one more" = 2 .. }) diff --git a/src/mdschema/validator/node_walker/validators/nodes.rs b/src/mdschema/validator/node_walker/validators/nodes.rs index b8a6a01..913f139 100644 --- a/src/mdschema/validator/node_walker/validators/nodes.rs +++ b/src/mdschema/validator/node_walker/validators/nodes.rs @@ -240,7 +240,7 @@ mod tests { use super::super::test_utils::ValidatorTester; use super::NodeVsNodeValidator; use crate::mdschema::validator::{ - errors::{ChildrenCount, SchemaViolationError, ValidationError}, + errors::{SchemaViolationError, ValidationError}, node_pos_pair::NodePosPair, }; @@ -434,11 +434,7 @@ mod tests { ValidationError::SchemaViolation( SchemaViolationError::ChildrenLengthMismatch { expected, .. }, ) => { - assert_eq!( - expected, - &ChildrenCount::SpecificCount(0), - "expected should be 0 for empty schema" - ); + assert_eq!(expected.0, 0, "expected should be 0 for empty schema"); } _ => panic!("Expected ChildrenLengthMismatch error, got: {:?}", error), }, diff --git a/src/mdschema/validator/validator.rs b/src/mdschema/validator/validator.rs index 9a6c274..e39fea2 100644 --- a/src/mdschema/validator/validator.rs +++ b/src/mdschema/validator/validator.rs @@ -255,7 +255,7 @@ impl ValidatorState for Validator { mod tests { use serde_json::json; - use crate::mdschema::validator::errors::{ChildrenCount, SchemaError, SchemaViolationError}; + use crate::mdschema::validator::errors::{SchemaError, SchemaViolationError}; use super::*; @@ -435,7 +435,7 @@ fooobar expected, actual, }) => { - assert_eq!(*expected, ChildrenCount::from_specific(3)); + assert_eq!(*expected, 3); assert_eq!(*actual, 2); assert_eq!(*schema_index, 0); } @@ -962,7 +962,7 @@ Footer: goodbye expected, .. }) => { - assert_eq!(*expected, ChildrenCount::from_specific(4)); + assert_eq!(*expected, 4); assert_eq!(*actual, 5); } _ => panic!("Expected ChildrenLengthMismatch error, got {:?}", errors[0]), diff --git a/tests/misc.rs b/tests/misc.rs index 6f75adb..746ba4e 100644 --- a/tests/misc.rs +++ b/tests/misc.rs @@ -3,9 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{ - ChildrenCount, SchemaViolationError, ValidationError, -}; +use mdvalidate::mdschema::validator::errors::{SchemaViolationError, ValidationError}; test_case!( node_heading_and_paragraph, @@ -32,7 +30,7 @@ test_case!( SchemaViolationError::ChildrenLengthMismatch { schema_index: 0, input_index: 0, - expected: ChildrenCount::SpecificCount(0), + expected: 0.into(), actual: 1, } )] diff --git a/tests/rulers.rs b/tests/rulers.rs index 9951c1e..9d0d788 100644 --- a/tests/rulers.rs +++ b/tests/rulers.rs @@ -4,7 +4,7 @@ use serde_json::json; mod helpers; use mdvalidate::mdschema::validator::errors::{ - ChildrenCount, SchemaViolationError, ValidationError, + SchemaViolationError, ValidationError, }; test_case!(ruler_dashes, r#"---"#, r#"---"#, json!({}), vec![]); @@ -18,7 +18,7 @@ test_case!( SchemaViolationError::ChildrenLengthMismatch { schema_index: 0, input_index: 0, - expected: ChildrenCount::SpecificCount(1), + expected: 1.into(), actual: 0, } )] From ed43318a9bc5254acc85b92705833362c09fd72c Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 16:11:24 -0500 Subject: [PATCH 04/33] more progress on repeating paragraphs --- src/mdschema/validator/errors.rs | 19 +++++- .../validator/matcher/matcher_extras.rs | 10 +++ .../node_walker/validators/containers.rs | 63 +++++++++++++++---- .../validator/node_walker/validators/lists.rs | 30 ++++++--- tests/rulers.rs | 4 +- 5 files changed, 100 insertions(+), 26 deletions(-) diff --git a/src/mdschema/validator/errors.rs b/src/mdschema/validator/errors.rs index 4e0fd7d..c435375 100644 --- a/src/mdschema/validator/errors.rs +++ b/src/mdschema/validator/errors.rs @@ -1,5 +1,8 @@ use crate::mdschema::validator::{ - matcher::{matcher::*, matcher_extras::MatcherExtrasError}, + matcher::{ + matcher::*, + matcher_extras::{MatcherExtras, MatcherExtrasError}, + }, validator::{Validator, ValidatorState}, }; use ariadne::{Color, Label, Report, ReportKind, Source}; @@ -408,6 +411,20 @@ impl From for ChildrenLengthRange { } } +impl ChildrenLengthRange { + /// Build a range from optional min/max bounds (defaults to 0 for missing min). + pub fn from_optional_bounds(min: Option, max: Option) -> Self { + let min = min.unwrap_or(0); + let max = max.unwrap_or(min); + ChildrenLengthRange(min, max) + } + + /// Build a range from a matcher's extras. + pub fn from_matcher_extras(extras: &MatcherExtras) -> Self { + ChildrenLengthRange::from_optional_bounds(extras.min_items(), extras.max_items()) + } +} + impl PartialEq for ChildrenLengthRange { fn eq(&self, other: &usize) -> bool { self.0 == *other && self.1 == *other diff --git a/src/mdschema/validator/matcher/matcher_extras.rs b/src/mdschema/validator/matcher/matcher_extras.rs index 61e936b..8dc5ecf 100644 --- a/src/mdschema/validator/matcher/matcher_extras.rs +++ b/src/mdschema/validator/matcher/matcher_extras.rs @@ -201,11 +201,21 @@ impl MatcherExtras { self.min_items } + /// Return minimum items or the provided default. + pub fn min_items_or(&self, default: usize) -> usize { + self.min_items.unwrap_or(default) + } + /// Return optional maximum number of items at this list level pub fn max_items(&self) -> Option { self.max_items } + /// Return maximum items or the provided default. + pub fn max_items_or(&self, default: usize) -> usize { + self.max_items.unwrap_or(default) + } + /// Whether min/max constraints were specified pub fn had_min_max(&self) -> bool { self.had_min_max diff --git a/src/mdschema/validator/node_walker/validators/containers.rs b/src/mdschema/validator/node_walker/validators/containers.rs index 7a8be0f..0bba27e 100644 --- a/src/mdschema/validator/node_walker/validators/containers.rs +++ b/src/mdschema/validator/node_walker/validators/containers.rs @@ -90,11 +90,13 @@ impl ValidatorImpl for TextualContainerVsTextualContainerValidator { } match count_non_literal_matchers_in_children(&schema_cursor, walker.schema_str()) { - Ok(non_repeating_matchers_count) if non_repeating_matchers_count > 1 && got_eof => { + Ok(non_literal_matchers_in_children) + if non_literal_matchers_in_children > 1 && got_eof => + { result.add_error(ValidationError::SchemaError( SchemaError::MultipleMatchersInNodeChildren { schema_index: schema_cursor.descendant_index(), - received: non_repeating_matchers_count, + received: non_literal_matchers_in_children, }, )) } @@ -253,15 +255,15 @@ impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { Ok(matcher) if matcher.is_repeated() => { let mut matches = vec![]; - let min_count = matcher.extras().min_items().unwrap_or(0); - let max_count = matcher.extras().max_items(); + let extras = matcher.extras(); + let expected_range = ChildrenLengthRange::from_matcher_extras(extras); if !input_cursor.goto_first_child() { result.add_error(ValidationError::SchemaViolation( SchemaViolationError::NotEnoughNodesForRepeatingParagraph { schema_index: schema_cursor.descendant_index(), input_index: input_cursor.descendant_index(), - expected: (min_count, max_count.unwrap_or(min_count)).into(), + expected: expected_range, actual: 0, }, )); @@ -273,10 +275,16 @@ impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { &walker.with_cursors(&schema_cursor, &input_cursor), got_eof, ); + + if current_match.has_errors() { + result.join_errors(current_match.errors()); + return result; + } + matches.push(current_match.value().clone()); let prev_sibling = input_cursor.clone(); - if input_cursor.goto_next_sibling() && is_paragraph_node(&input_cursor.node()) { + if input_cursor.goto_next_sibling() { // continue } else { input_cursor.reset_to(&prev_sibling); @@ -285,10 +293,7 @@ impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { } if let Some(id) = matcher.id() { - result.set_match( - id, - serde_json::Value::Array(matches.into_iter().collect()), - ); + result.set_match(id, serde_json::Value::Array(matches.into_iter().collect())); } result @@ -359,8 +364,12 @@ mod tests { use super::{TextualContainerVsTextualContainerValidator, is_repeated_matcher_paragraph}; use crate::mdschema::validator::{ - node_pos_pair::NodePosPair, node_walker::validators::test_utils::ValidatorTester, - ts_types::*, ts_utils::parse_markdown, + node_pos_pair::NodePosPair, + node_walker::validators::{ + containers::ParagraphVsRepeatedMatcherParagraphValidator, test_utils::ValidatorTester, + }, + ts_types::*, + ts_utils::parse_markdown, }; #[test] @@ -509,4 +518,34 @@ mod tests { assert_eq!(errors, vec![]); assert_eq!(value, json!({"a": "a", "b": "b"})); } + + #[test] + fn test_paragraph_vs_repeated_matcher_paragraph_simple() { + let schema_str = r#" +`items:/.*/`{,} +"#; + let input_str = r#" +foo +bar +buzz +"#; + + let result = ValidatorTester::::from_strs( + schema_str, input_str, + ) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_paragraphs(s, i))) + .validate_complete(); + + let errors = result.errors().to_vec(); + let value = result.value().clone(); + + // assert_eq!( + // *result.farthest_reached_pos(), + // NodePosPair::from_pos(12, 10) + // ); + // assert_eq!(errors, vec![]); + assert_eq!(value, json!({"items": ["foo", "bar", "buzz"]})); + } } diff --git a/src/mdschema/validator/node_walker/validators/lists.rs b/src/mdschema/validator/node_walker/validators/lists.rs index f72a08e..4c842cb 100644 --- a/src/mdschema/validator/node_walker/validators/lists.rs +++ b/src/mdschema/validator/node_walker/validators/lists.rs @@ -22,7 +22,9 @@ use crate::mdschema::validator::{ }; use crate::{ invariant_violation, - mdschema::validator::errors::{SchemaError, SchemaViolationError, ValidationError}, + mdschema::validator::errors::{ + ChildrenLengthRange, SchemaError, SchemaViolationError, ValidationError, + }, }; use log::trace; use serde_json::json; @@ -134,8 +136,10 @@ impl ValidatorImpl for ListVsListValidator { match extract_repeated_matcher_from_list_item(&schema_cursor, walker.schema_str()) { // We were able to find a valid repeated matcher in the schema list item. Some(Ok(matcher)) => { - let min_items = matcher.extras().min_items().unwrap_or(0); - let max_items = matcher.extras().max_items(); + let extras = matcher.extras(); + let min_items_option = extras.min_items(); + let min_items = extras.min_items_or(0); + let max_items = extras.max_items(); trace!( "Found repeated matcher: id={:?}, min_items={}, max_items={:?}, variable_length={}", matcher.id(), @@ -159,7 +163,7 @@ impl ValidatorImpl for ListVsListValidator { return result; } - let mut values_at_level = Vec::with_capacity(max_items.unwrap_or(1)); + let mut values_at_level = Vec::with_capacity(extras.max_items_or(1)); let mut validate_so_far = 0; loop { @@ -200,12 +204,12 @@ impl ValidatorImpl for ListVsListValidator { ); // If we've now validated the max number of items, check if there are more - if let Some(max_items) = max_items - && validate_so_far == max_items + if let Some(max_items_value) = max_items + && validate_so_far == max_items_value { trace!( "Reached max items limit ({}), checking if there are more items", - max_items + max_items_value ); // Check if there are more items beyond the max @@ -216,13 +220,16 @@ impl ValidatorImpl for ListVsListValidator { // Report error immediately - extra items won't disappear trace!( "Error: More items than max allowed ({} > {}), early exit", - "at least one more", max_items + "at least one more", max_items_value ); result.add_error(ValidationError::SchemaViolation( SchemaViolationError::ChildrenLengthMismatch { schema_index: schema_cursor.descendant_index(), input_index: input_cursor.descendant_index(), - expected: (min_items, max_items).into(), + expected: ChildrenLengthRange::from_optional_bounds( + min_items_option, + Some(max_items_value), + ), actual: validate_so_far + 1, // At least one more }, )); @@ -248,7 +255,10 @@ impl ValidatorImpl for ListVsListValidator { SchemaViolationError::ChildrenLengthMismatch { schema_index: schema_cursor.descendant_index(), input_index: input_cursor.descendant_index(), - expected: (min_items, max_items.unwrap_or(min_items)).into(), + expected: ChildrenLengthRange::from_optional_bounds( + min_items_option, + max_items, + ), actual: validate_so_far, }, )); diff --git a/tests/rulers.rs b/tests/rulers.rs index 9d0d788..744c06d 100644 --- a/tests/rulers.rs +++ b/tests/rulers.rs @@ -3,9 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{ - SchemaViolationError, ValidationError, -}; +use mdvalidate::mdschema::validator::errors::{SchemaViolationError, ValidationError}; test_case!(ruler_dashes, r#"---"#, r#"---"#, json!({}), vec![]); From 9c8f76fba22c648885f5e1958fd29033bef0e08d Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 18:01:38 -0500 Subject: [PATCH 05/33] switch to instance based version --- .../src/content/docs/matchers/02-matchers.mdx | 56 ++++ src/mdschema/validator/matcher/matcher.rs | 159 +++++++--- .../count_non_literal_matchers_in_children.rs | 4 +- .../node_walker/helpers/curly_matchers.rs | 2 +- .../validator/node_walker/node_walker.rs | 2 +- .../validator/node_walker/validators/code.rs | 3 +- .../node_walker/validators/containers.rs | 294 ++++++++++++------ .../node_walker/validators/headings.rs | 11 +- .../validator/node_walker/validators/links.rs | 3 +- .../validator/node_walker/validators/lists.rs | 15 +- .../node_walker/validators/matchers.rs | 16 +- .../validator/node_walker/validators/mod.rs | 14 +- .../validator/node_walker/validators/nodes.rs | 145 +++++---- .../node_walker/validators/quotes.rs | 7 +- .../node_walker/validators/tables.rs | 7 +- .../node_walker/validators/textual.rs | 5 +- src/mdschema/validator/validator.rs | 2 +- 17 files changed, 508 insertions(+), 237 deletions(-) diff --git a/docs/src/content/docs/matchers/02-matchers.mdx b/docs/src/content/docs/matchers/02-matchers.mdx index df99f25..3f515ec 100644 --- a/docs/src/content/docs/matchers/02-matchers.mdx +++ b/docs/src/content/docs/matchers/02-matchers.mdx @@ -162,6 +162,62 @@ To match inline code blocks literally instead of treating them as matchers, add valid={false} /> +# Everything Matchers + +If a matcher has no regex pattern (just a label in backticks), it will match everything available in the current context as an identity function. The syntax is simply `` `label` ``, where the label follows the naming rules below. + +## Naming Rules + +Matcher labels (both for regex matchers and everything matchers) must follow these rules: + +- Must contain only alphanumeric characters (a-z, A-Z, 0-9), hyphens (`-`), and underscores (`_`) +- Cannot contain spaces or other special characters +- Valid examples: `user_name`, `item-count`, `id123`, `MyData` +- Invalid examples: `user name` (space), `data@field` (special char), `item.count` (period) + +## Behavior + +Everything matchers act as an identity function - they **always** match and return exactly what was passed to them, including special characters, spaces, and any other content: + + + + + + + +When used in a paragraph context, everything matchers will capture all spanning elements including formatting like italics: + + + + + +Everything matchers never fail to match - they accept any input and return it unchanged under the specified label. + ### Escaping the Exclamation Mark diff --git a/src/mdschema/validator/matcher/matcher.rs b/src/mdschema/validator/matcher/matcher.rs index 158b1cb..c5f71b0 100644 --- a/src/mdschema/validator/matcher/matcher.rs +++ b/src/mdschema/validator/matcher/matcher.rs @@ -12,8 +12,10 @@ use crate::mdschema::validator::{ ts_utils::{get_next_node, get_node_and_next_node, get_node_text}, }; +static ID_PATTERN: LazyLock = LazyLock::new(|| Regex::new(r"^[a-zA-Z0-9-_]+$").unwrap()); + static REGEX_MATCHER_PATTERN: LazyLock = - LazyLock::new(|| Regex::new(r"^(((?P[a-zA-Z0-9-_]+)):)?\/(?P.+?)\/").unwrap()); + LazyLock::new(|| Regex::new(r"^(?:(?P[a-zA-Z0-9-_]+):)?(?:\/(?P.+?)\/|(?P[a-zA-Z0-9-_]+))$").unwrap()); static RANGE_PATTERN: LazyLock = LazyLock::new(|| Regex::new(r"\{(\d*),(\d*)\}").unwrap()); @@ -90,7 +92,7 @@ fn extract_item_count_limits(text: &str) -> (Option, Option, bool) pub struct Matcher { id: Option, /// A compiled regex for the pattern. - pattern: MatcherType, + kind: MatcherKind, /// Extra flags, which we receive via extra text that corresponds to the matcher flags: HashSet, /// Extra configuration options @@ -100,13 +102,27 @@ pub struct Matcher { } #[derive(Debug, Clone)] -pub struct MatcherType { - regex: Regex, +pub enum MatcherKind { + Regex(Regex), + All, +} + +impl MatcherKind { + pub fn from_regex(regex: Regex) -> Self { + MatcherKind::Regex(regex) + } + + pub fn all() -> Self { + MatcherKind::All + } } -impl fmt::Display for MatcherType { +impl fmt::Display for MatcherKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.regex.as_str()) + match self { + MatcherKind::Regex(regex) => write!(f, "{}", regex.as_str()), + MatcherKind::All => write!(f, "all"), + } } } @@ -127,14 +143,14 @@ impl Matcher { pub fn new( id: Option, flags: HashSet, - pattern: MatcherType, + r#type: MatcherKind, extras: MatcherExtras, original_str_len: usize, ) -> Self { Matcher { id, flags, - pattern, + kind: r#type, extras, original_str_len, } @@ -142,7 +158,7 @@ impl Matcher { pub fn new_with_empty_flags( id: Option, - pattern: MatcherType, + pattern: MatcherKind, extras: MatcherExtras, original_str_len: usize, ) -> Self { @@ -176,7 +192,7 @@ impl Matcher { Some(caps) => extract_id_and_pattern(&caps, &pattern_str)?, None => { return Err(MatcherError::MatcherInteriorRegexInvalid(format!( - "Expected format: 'id:/regex/', got {}", // TODO: don't hard code what we expect + "Expected format: 'id:/regex/' or 'id', got {}", pattern_str ))); } @@ -210,10 +226,10 @@ impl Matcher { schema_cursor: &TreeCursor, schema_str: &str, ) -> Result { - #[cfg(feature = "invariant_violations")] - if !is_inline_code_node(&schema_cursor.node()) { - invariant_violation!("expected inline code node for extracting a matcher"); - } + // #[cfg(feature = "invariant_violations")] + // if !is_inline_code_node(&schema_cursor.node()) { + // invariant_violation!("expected inline code node for extracting a matcher"); + // } let pattern_str = get_node_text(&schema_cursor.node(), schema_str); let next_node = get_next_node(schema_cursor); @@ -227,9 +243,12 @@ impl Matcher { /// Get an actual match string for a given text, if it matches. pub fn match_str<'a>(&self, text: &'a str) -> Option<&'a str> { - match self.pattern.regex.find(text) { - Some(mat) => Some(&text[mat.start()..mat.end()]), - None => None, + match &self.kind { + MatcherKind::Regex(regex) => { + let mat = regex.find(text)?; + Some(&text[mat.start()..mat.end()]) + } + MatcherKind::All => Some(text), } } @@ -249,8 +268,8 @@ impl Matcher { } /// Get a reference to the pattern - pub fn pattern(&self) -> &MatcherType { - &self.pattern + pub fn pattern(&self) -> &MatcherKind { + &self.kind } /// The original string length of the matcher including the `s. @@ -272,11 +291,15 @@ impl Matcher { _ => true, } } + + pub fn kind(&self) -> &MatcherKind { + &self.kind + } } impl PartialEq for Matcher { fn eq(&self, other: &Self) -> bool { - self.id == other.id && format!("{}", self.pattern) == format!("{}", other.pattern) + self.id == other.id && format!("{}", self.kind) == format!("{}", other.kind) } } @@ -284,40 +307,54 @@ impl PartialEq for Matcher { fn extract_id_and_pattern( captures: ®ex::Captures, pattern: &str, -) -> Result<(Option, MatcherType), MatcherError> { - let id = captures.name("id").map(|m| m.as_str().to_string()); +) -> Result<(Option, MatcherKind), MatcherError> { + // Check if we have a bare ID (e.g., `word`) + if let Some(bare_id) = captures.name("bare_id") { + let id = bare_id.as_str().to_string(); + return Ok((Some(id), MatcherKind::all())); + } + + // Otherwise, we have a regex pattern (e.g., `id:/regex/` or `/regex/`) + let id = captures.name("id_with_regex").map(|m| m.as_str().to_string()); let regex_pattern = captures .name("regex") .map(|m| m.as_str().to_string()) .ok_or_else(|| { MatcherError::MatcherInteriorRegexInvalid(format!( - "Expected format: 'id:/regex/', got {}", + "Expected format: 'id:/regex/' or 'id', got {}", pattern )) })?; - let matcher = MatcherType { - regex: Regex::new(&format!("^{}", regex_pattern)).map_err(|e| { - MatcherError::MatcherInteriorRegexInvalid(format!("Invalid regex pattern: {}", e)) - })?, - }; + // Create a regex matcher from the pattern + let matcher = MatcherKind::from_regex(Regex::new(&format!("^{}", regex_pattern)).map_err(|e| { + MatcherError::MatcherInteriorRegexInvalid(format!("Invalid regex pattern: {}", e)) + })?); Ok((id, matcher)) } impl fmt::Display for Matcher { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let regex_str = self.pattern.regex.as_str(); - // The regex is stored as "^", so remove the leading ^ - let pattern_str = if regex_str.starts_with('^') { - ®ex_str[1..] - } else { - regex_str - }; - - match &self.id { - Some(id) => write!(f, "{}:/{}/", id, pattern_str), - None => write!(f, "/{}/", pattern_str), + match &self.kind { + MatcherKind::Regex(regex) => { + let regex_str = regex.as_str(); + // The regex is stored as "^", so remove the leading ^ + let pattern_str = if regex_str.starts_with('^') { + ®ex_str[1..] + } else { + regex_str + }; + + match &self.id { + Some(id) => write!(f, "{}:/{}/", id, pattern_str), + None => write!(f, "/{}/", pattern_str), + } + } + MatcherKind::All => match &self.id { + Some(id) => write!(f, "{}:/all/", id), + None => write!(f, "/all/"), + }, } } } @@ -384,14 +421,28 @@ pub fn extract_text_matcher(cursor: &TreeCursor, str: &str) -> Result { + assert_eq!(matcher.id, Some("word".to_string())); + assert_eq!(matcher.match_str("hello world"), Some("hello world")); + assert_eq!(matcher.match_str("1234"), Some("1234")); + assert_eq!(matcher.match_str("!@#$"), Some("!@#$")); + } + kind => panic!("Unexpected matcher kind: {:?}", kind), + } + } + + #[test] + fn test_matcher_creation_and_matching_regex() { let matcher = Matcher::try_from_pattern_and_suffix_str("`word:/\\w+/`", None).unwrap(); assert_eq!(matcher.id, Some("word".to_string())); assert_eq!(matcher.match_str("hello world"), Some("hello")); @@ -399,10 +450,32 @@ mod tests { assert_eq!(matcher.match_str("!@#$"), None); } + #[test] + fn test_all_matcher_matches_everything() { + let matcher = Matcher::try_from_pattern_and_suffix_str("`my_id`", None).unwrap(); + assert_eq!(matcher.id, Some("my_id".to_string())); + + // All matcher acts as identity function - always matches and returns exactly what was passed + assert_eq!(matcher.match_str("hello"), Some("hello")); + assert_eq!(matcher.match_str("test123"), Some("test123")); + assert_eq!(matcher.match_str("under_score"), Some("under_score")); + assert_eq!(matcher.match_str("MixedCase123"), Some("MixedCase123")); + + // Should match special characters too - identity function + assert_eq!(matcher.match_str("@*&^R"), Some("@*&^R")); + assert_eq!(matcher.match_str("!test"), Some("!test")); + assert_eq!(matcher.match_str("-dash"), Some("-dash")); + + // Matches everything including spaces and special characters + assert_eq!(matcher.match_str("valid-later"), Some("valid-later")); + assert_eq!(matcher.match_str("test@symbol"), Some("test@symbol")); + assert_eq!(matcher.match_str("anything at all!"), Some("anything at all!")); + } + #[test] fn test_matcher_invalid_pattern() { - // Test error handling for invalid pattern using try_from_pattern_and_suffix_str - let result = Matcher::try_from_pattern_and_suffix_str("`invalid_pattern`", None); + // Test error handling for truly invalid pattern (invalid chars for ID, not a regex) + let result = Matcher::try_from_pattern_and_suffix_str("`invalid pattern with spaces`", None); assert!(result.is_err()); match result.as_ref().unwrap_err() { MatcherError::MatcherInteriorRegexInvalid(_) => { diff --git a/src/mdschema/validator/node_walker/helpers/count_non_literal_matchers_in_children.rs b/src/mdschema/validator/node_walker/helpers/count_non_literal_matchers_in_children.rs index 274d549..aa98a6c 100644 --- a/src/mdschema/validator/node_walker/helpers/count_non_literal_matchers_in_children.rs +++ b/src/mdschema/validator/node_walker/helpers/count_non_literal_matchers_in_children.rs @@ -17,14 +17,14 @@ use crate::mdschema::validator::{ /// /// For example, /// -/// ``` +/// ```text /// `test:/test/`{,} /// ``` /// /// Contains a document with one child, which is a repeated paragraph matcher, /// whereas /// -/// ``` +/// ```text /// `test:/test/` test /// ``` /// diff --git a/src/mdschema/validator/node_walker/helpers/curly_matchers.rs b/src/mdschema/validator/node_walker/helpers/curly_matchers.rs index ce812f7..4358abb 100644 --- a/src/mdschema/validator/node_walker/helpers/curly_matchers.rs +++ b/src/mdschema/validator/node_walker/helpers/curly_matchers.rs @@ -17,7 +17,7 @@ pub fn extract_matcher_from_curly_delineated_text( let suffix = caps.name("suffix").map(|m| m.as_str()); Some(Matcher::try_from_pattern_and_suffix_str( - &format!("`{}`{}", matcher_str, suffix.unwrap_or("")), + &format!("`{}`", matcher_str), suffix, )) } diff --git a/src/mdschema/validator/node_walker/node_walker.rs b/src/mdschema/validator/node_walker/node_walker.rs index b5a3768..71ff978 100644 --- a/src/mdschema/validator/node_walker/node_walker.rs +++ b/src/mdschema/validator/node_walker/node_walker.rs @@ -57,7 +57,7 @@ impl<'a, S: ValidatorState> NodeWalker<'a, S> { .farthest_reached_pos() .walk_cursors_to_pos(schema_cursor, input_cursor); - let validation_result = NodeVsNodeValidator::validate(&walker, got_eof); + let validation_result = NodeVsNodeValidator::default().validate(&walker, got_eof); self.state.push_validation_result(validation_result.clone()); diff --git a/src/mdschema/validator/node_walker/validators/code.rs b/src/mdschema/validator/node_walker/validators/code.rs index 12babe2..48d50d6 100644 --- a/src/mdschema/validator/node_walker/validators/code.rs +++ b/src/mdschema/validator/node_walker/validators/code.rs @@ -51,10 +51,11 @@ use crate::mdschema::validator::{ /// /// Note you cannot yet enforce regex on the actual code content. /// ``` +#[derive(Default)] pub(super) struct CodeVsCodeValidator; impl ValidatorImpl for CodeVsCodeValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let _got_eof = got_eof; validate_code_vs_code_impl(walker) } diff --git a/src/mdschema/validator/node_walker/validators/containers.rs b/src/mdschema/validator/node_walker/validators/containers.rs index 0bba27e..823d2a9 100644 --- a/src/mdschema/validator/node_walker/validators/containers.rs +++ b/src/mdschema/validator/node_walker/validators/containers.rs @@ -5,9 +5,12 @@ //! paragraphs/emphasis and validates them with matcher support and link-aware //! handling. use log::trace; +use serde_json::Value; use tree_sitter::TreeCursor; +use crate::mdschema::validator::matcher::matcher::MatcherKind; use crate::mdschema::validator::node_walker::helpers::count_non_literal_matchers_in_children::count_non_literal_matchers_in_children; +use crate::mdschema::validator::ts_utils::get_node_text; use crate::mdschema::validator::validator_walker::ValidatorWalker; use crate::mdschema::validator::{ errors::*, @@ -57,10 +60,21 @@ use crate::{compare_node_kinds_check, invariant_violation}; /// check that there is a text node in the input, maybe error, and if there is, /// validate that the contents of the rest of it is the same. /// - Then move to the next node pair, hopping two nodes at once for the schema node. -pub(super) struct TextualContainerVsTextualContainerValidator; +#[derive(Default)] +pub(super) struct ContainerVsContainerValidator { + allow_repeating: bool, +} + +impl ContainerVsContainerValidator { + pub fn with_allow_repeating() -> Self { + Self { + allow_repeating: true, + } + } +} -impl ValidatorImpl for TextualContainerVsTextualContainerValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { +impl ValidatorImpl for ContainerVsContainerValidator { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); @@ -86,7 +100,8 @@ impl ValidatorImpl for TextualContainerVsTextualContainerValidator { ); if is_repeated_matcher_paragraph(&schema_cursor, walker.schema_str()) { - return ParagraphVsRepeatedMatcherParagraphValidator::validate(walker, got_eof); + return ParagraphVsRepeatedMatcherParagraphValidator::default() + .validate(walker, got_eof); } match count_non_literal_matchers_in_children(&schema_cursor, walker.schema_str()) { @@ -160,15 +175,11 @@ impl ValidatorImpl for TextualContainerVsTextualContainerValidator { let pair_result = if both_are_link_nodes(&schema_cursor.node(), &input_cursor.node()) || both_are_image_nodes(&schema_cursor.node(), &input_cursor.node()) { - LinkVsLinkValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ) + LinkVsLinkValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof) } else { - let new_result = TextualVsTextualValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + let new_result = TextualVsTextualValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); new_result.walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); new_result }; @@ -218,10 +229,11 @@ impl ValidatorImpl for TextualContainerVsTextualContainerValidator { /// ] /// } /// ``` +#[derive(Default)] pub(super) struct ParagraphVsRepeatedMatcherParagraphValidator; impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), &walker.input_cursor()); @@ -241,6 +253,12 @@ impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { ); } + let next_schema_cursor = { + let mut schema_cursor = schema_cursor.clone(); + schema_cursor.goto_next_sibling(); + schema_cursor + }; + if !schema_cursor.goto_first_child() { #[cfg(feature = "invariant_violations")] invariant_violation!( @@ -256,35 +274,20 @@ impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { let mut matches = vec![]; let extras = matcher.extras(); - let expected_range = ChildrenLengthRange::from_matcher_extras(extras); - - if !input_cursor.goto_first_child() { - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::NotEnoughNodesForRepeatingParagraph { - schema_index: schema_cursor.descendant_index(), - input_index: input_cursor.descendant_index(), - expected: expected_range, - actual: 0, - }, - )); - return result; - } - loop { - let current_match = TextualVsTextualValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + let n = extras.max_items().unwrap_or(usize::MAX); + for _ in 0..n { + // compare the ENTIRE text of the paragraph + let input_paragraph_text = + get_node_text(&input_cursor.node(), walker.input_str()); - if current_match.has_errors() { - result.join_errors(current_match.errors()); - return result; + match matcher.match_str(input_paragraph_text) { + Some(matched) => matches.push(matched), + None => {} } - matches.push(current_match.value().clone()); - let prev_sibling = input_cursor.clone(); - if input_cursor.goto_next_sibling() { + if input_cursor.goto_next_sibling() && is_paragraph_node(&input_cursor.node()) { // continue } else { input_cursor.reset_to(&prev_sibling); @@ -292,8 +295,23 @@ impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { } } + if matches.len() < extras.min_items().unwrap_or(0) { + todo!("Too few items for repeating matcher"); + } + + input_cursor.goto_next_sibling(); + result.sync_cursor_pos(&next_schema_cursor, &input_cursor); + if let Some(id) = matcher.id() { - result.set_match(id, serde_json::Value::Array(matches.into_iter().collect())); + result.set_match( + id, + serde_json::Value::Array( + matches + .iter() + .map(|s| Value::String(s.to_string())) + .collect(), + ), + ); } result @@ -317,14 +335,14 @@ impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { /// /// For example, /// -/// ``` +/// ```md /// `test:/test/`{,} /// ``` /// /// Contains a document with one child, which is a repeated paragraph matcher, /// whereas /// -/// ``` +/// ```md /// `test:/test/` test /// ``` /// @@ -352,7 +370,7 @@ fn is_repeated_matcher_paragraph(schema_cursor: &TreeCursor, schema_str: &str) - schema_cursor.goto_first_child(); // note we know there is one because we checked above match Matcher::try_from_schema_cursor(&schema_cursor, schema_str) { - Ok(matcher) if matcher.is_repeated() => true, + Ok(matcher) if matcher.is_repeated() && matches!(matcher.kind(), MatcherKind::All) => true, Ok(_) => false, Err(_) => false, } @@ -362,8 +380,9 @@ fn is_repeated_matcher_paragraph(schema_cursor: &TreeCursor, schema_str: &str) - mod tests { use serde_json::json; - use super::{TextualContainerVsTextualContainerValidator, is_repeated_matcher_paragraph}; + use super::{ContainerVsContainerValidator, is_repeated_matcher_paragraph}; use crate::mdschema::validator::{ + errors::{SchemaViolationError, ValidationError}, node_pos_pair::NodePosPair, node_walker::validators::{ containers::ParagraphVsRepeatedMatcherParagraphValidator, test_utils::ValidatorTester, @@ -395,7 +414,7 @@ mod tests { #[test] fn test_is_repeated_matcher_paragraph_simple_repeating_matcher() { - let schema_str = "`test:/test/`{,}"; + let schema_str = "`test`{,}"; let schema_tree = parse_markdown(schema_str).unwrap(); let mut schema_cursor = schema_tree.walk(); schema_cursor.goto_first_child(); @@ -405,7 +424,7 @@ mod tests { #[test] fn test_is_repeated_matcher_paragraph_matcher_non_repeating() { - let schema_str = "`test:/test/` test"; + let schema_str = "`test` test"; let schema_tree = parse_markdown(schema_str).unwrap(); let mut schema_cursor = schema_tree.walk(); schema_cursor.goto_first_child(); @@ -438,15 +457,16 @@ mod tests { let schema_str = "# Test Wolf [hi](https://example.com)"; let input_str = "# Test Wolf [hi](https://foobar.com)"; - let result = ValidatorTester::::from_strs( - schema_str, input_str, - ) - .walk() - .goto_first_child_then_unwrap() - .goto_first_child_then_unwrap() - .goto_next_sibling_then_unwrap() - .peek_nodes(|(s, i)| assert!(is_heading_content_node(s) && is_heading_content_node(i))) - .validate_complete(); + let result = + ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .goto_first_child_then_unwrap() + .goto_next_sibling_then_unwrap() + .peek_nodes(|(s, i)| { + assert!(is_heading_content_node(s) && is_heading_content_node(i)) + }) + .validate_complete(); assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(9, 9)); assert!(!result.errors().is_empty()); @@ -458,15 +478,16 @@ mod tests { let schema_str = "# Test Wolf"; let input_str = "# Test Wolf"; - let result = ValidatorTester::::from_strs( - schema_str, input_str, - ) - .walk() - .goto_first_child_then_unwrap() - .goto_first_child_then_unwrap() - .goto_next_sibling_then_unwrap() - .peek_nodes(|(s, i)| assert!(is_heading_content_node(s) && is_heading_content_node(i))) - .validate_complete(); + let result = + ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .goto_first_child_then_unwrap() + .goto_next_sibling_then_unwrap() + .peek_nodes(|(s, i)| { + assert!(is_heading_content_node(s) && is_heading_content_node(i)) + }) + .validate_complete(); assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(4, 4)); assert_eq!(result.errors(), &vec![]); @@ -478,15 +499,16 @@ mod tests { let schema_str = "# Test `name:/[a-zA-Z]+/`"; let input_str = "# Test Wolf"; - let result = ValidatorTester::::from_strs( - schema_str, input_str, - ) - .walk() - .goto_first_child_then_unwrap() - .goto_first_child_then_unwrap() - .goto_next_sibling_then_unwrap() - .peek_nodes(|(s, i)| assert!(is_heading_content_node(s) && is_heading_content_node(i))) - .validate_complete(); + let result = + ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .goto_first_child_then_unwrap() + .goto_next_sibling_then_unwrap() + .peek_nodes(|(s, i)| { + assert!(is_heading_content_node(s) && is_heading_content_node(i)) + }) + .validate_complete(); assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(6, 4)); assert_eq!(result.errors(), &vec![]); @@ -498,15 +520,14 @@ mod tests { let schema_str = "# Heading [test]({a:/a/}) `b:/b/`"; let input_str = "# Heading [test](a) b"; - let result = ValidatorTester::::from_strs( - schema_str, input_str, - ) - .walk() - .goto_first_child_then_unwrap() - .goto_first_child_then_unwrap() - .goto_next_sibling_then_unwrap() - .peek_nodes(|(s, i)| assert!(both_are_textual_containers(s, i))) - .validate_complete(); + let result = + ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .goto_first_child_then_unwrap() + .goto_next_sibling_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_textual_containers(s, i))) + .validate_complete(); let errors = result.errors().to_vec(); let value = result.value().clone(); @@ -522,11 +543,13 @@ mod tests { #[test] fn test_paragraph_vs_repeated_matcher_paragraph_simple() { let schema_str = r#" -`items:/.*/`{,} +`items`{,} "#; let input_str = r#" foo + bar + buzz "#; @@ -538,14 +561,109 @@ buzz .peek_nodes(|(s, i)| assert!(both_are_paragraphs(s, i))) .validate_complete(); - let errors = result.errors().to_vec(); + assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(1, 5)); + assert_eq!(result.errors(), vec![]); + assert_eq!(*result.value(), json!({"items": ["foo", "bar", "buzz"]})); + } + + #[test] + fn test_paragraph_vs_repeated_matcher_paragraph_simple_with_stuff_after() { + let schema_str = r#" +`items`{,} + +# Test +"#; + let input_str = r#" +foo + +bar + +buzz + +# Test +"#; + + let result = ValidatorTester::::from_strs( + schema_str, input_str, + ) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_paragraphs(s, i))) + .validate_complete(); + + assert_eq!( + *result.farthest_reached_pos(), + NodePosPair::from_pos(5, 7) // at the subsequent heading + ); + assert_eq!(result.errors(), vec![]); + assert_eq!(*result.value(), json!({"items": ["foo", "bar", "buzz"]})); + } + + #[test] + fn test_paragraph_vs_repeated_matcher_paragraph_with_italic() { + let schema_str = r#" +`items`{,} +"#; + let input_str = r#" +foo + +bar *italic* + +buzz +"#; + + let result = ValidatorTester::::from_strs( + schema_str, input_str, + ) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_paragraphs(s, i))) + .validate_complete(); + + let _errors = result.errors().to_vec(); let value = result.value().clone(); - // assert_eq!( - // *result.farthest_reached_pos(), - // NodePosPair::from_pos(12, 10) - // ); - // assert_eq!(errors, vec![]); - assert_eq!(value, json!({"items": ["foo", "bar", "buzz"]})); + assert_eq!(value, json!({"items": ["foo", "bar *italic*", "buzz"]})); + } + + #[test] + fn test_paragraph_vs_paragraph_with_normal_matcher() { + let schema_str = r#" +`data:/test/` +"#; + let input_str = r#" +test +"#; + + let result = + ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_paragraphs(s, i))) + .validate_complete(); + + // Should have no errors since "test" matches the pattern "^test" + assert_eq!(result.errors(), vec![]); + assert_eq!(*result.value(), json!({"data": "test"})); + } + + #[test] + fn test_paragraph_vs_paragraph_with_normal_matcher_mismatch() { + let schema_str = r#" +`data:/test/` +"#; + let input_str = r#" +foo +"#; + + let result = + ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_paragraphs(s, i))) + .validate_complete(); + + // Should have an error since "foo" doesn't match the pattern "^test" + assert!(!result.errors().is_empty()); } } diff --git a/src/mdschema/validator/node_walker/validators/headings.rs b/src/mdschema/validator/node_walker/validators/headings.rs index d55312f..f710ea6 100644 --- a/src/mdschema/validator/node_walker/validators/headings.rs +++ b/src/mdschema/validator/node_walker/validators/headings.rs @@ -10,7 +10,7 @@ use crate::invariant_violation; use crate::mdschema::validator::errors::ValidationError; use crate::mdschema::validator::node_walker::ValidationResult; use crate::mdschema::validator::node_walker::helpers::compare_node_kinds::compare_node_kinds; -use crate::mdschema::validator::node_walker::validators::containers::TextualContainerVsTextualContainerValidator; +use crate::mdschema::validator::node_walker::validators::containers::ContainerVsContainerValidator; use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; use crate::mdschema::validator::ts_types::*; use crate::mdschema::validator::ts_utils::waiting_at_end; @@ -20,10 +20,11 @@ use crate::mdschema::validator::validator_walker::ValidatorWalker; /// /// Checks that they are the same kind of heading, and and then delegates to /// `TextualContainerVsTextualContainerValidator::validate`. +#[derive(Default)] pub(super) struct HeadingVsHeadingValidator; impl ValidatorImpl for HeadingVsHeadingValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); @@ -99,10 +100,8 @@ impl ValidatorImpl for HeadingVsHeadingValidator { } // Now that we're at the heading content, use `validate_text_vs_text` - TextualContainerVsTextualContainerValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ) + ContainerVsContainerValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof) } } diff --git a/src/mdschema/validator/node_walker/validators/links.rs b/src/mdschema/validator/node_walker/validators/links.rs index 1e9e5d7..25d3dd7 100644 --- a/src/mdschema/validator/node_walker/validators/links.rs +++ b/src/mdschema/validator/node_walker/validators/links.rs @@ -24,10 +24,11 @@ use crate::mdschema::validator::validator_walker::ValidatorWalker; use crate::compare_node_kinds_check; /// Validate two link-like nodes (links or images) against each other. +#[derive(Default)] pub(super) struct LinkVsLinkValidator; impl ValidatorImpl for LinkVsLinkValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); diff --git a/src/mdschema/validator/node_walker/validators/lists.rs b/src/mdschema/validator/node_walker/validators/lists.rs index 4c842cb..bce770c 100644 --- a/src/mdschema/validator/node_walker/validators/lists.rs +++ b/src/mdschema/validator/node_walker/validators/lists.rs @@ -11,7 +11,7 @@ use crate::mdschema::validator::{ node_walker::{ ValidationResult, validators::{ - Validator, ValidatorImpl, containers::TextualContainerVsTextualContainerValidator, + Validator, ValidatorImpl, containers::ContainerVsContainerValidator, }, }, ts_types::*, @@ -88,10 +88,11 @@ use crate::compare_node_kinds_check; /// /// Note that a limitation here is that you cannot have a variable-length list /// that is not the final list in your schema. +#[derive(Default)] pub(super) struct ListVsListValidator; impl ValidatorImpl for ListVsListValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); @@ -308,7 +309,7 @@ impl ValidatorImpl for ListVsListValidator { // If there are more items to validate AT THE SAME LEVEL, recurse to // validate them. We now use the *next* schema node too. if schema_cursor.goto_next_sibling() && input_cursor.goto_next_sibling() { - let next_result = ListVsListValidator::validate( + let next_result = ListVsListValidator::default().validate( &walker.with_cursors(&schema_cursor, &input_cursor), got_eof, ); @@ -329,7 +330,7 @@ impl ValidatorImpl for ListVsListValidator { schema_cursor.node().kind() ); - let next_result = ListVsListValidator::validate( + let next_result = ListVsListValidator::default().validate( &walker.with_cursors(&schema_cursor, &input_cursor), got_eof, ); @@ -494,7 +495,7 @@ impl ValidatorImpl for ListVsListValidator { input_cursor.goto_first_child(); schema_cursor.goto_first_child(); - let deeper_result = ListVsListValidator::validate( + let deeper_result = ListVsListValidator::default().validate( &walker.with_cursors(&schema_cursor, &input_cursor), got_eof, ); @@ -505,7 +506,7 @@ impl ValidatorImpl for ListVsListValidator { // Recurse on next sibling if available! if schema_cursor.goto_next_sibling() && input_cursor.goto_next_sibling() { trace!("Moving to next sibling list items for continued validation"); - let new_matches = ListVsListValidator::validate( + let new_matches = ListVsListValidator::default().validate( &walker.with_cursors(&schema_cursor, &input_cursor), got_eof, ); @@ -605,7 +606,7 @@ fn validate_list_item_contents_vs_list_item_contents( ValidatorWalker::from_cursors(&schema_cursor, schema_str, &input_cursor, input_str); ( - TextualContainerVsTextualContainerValidator::validate(&walker, got_eof), + ContainerVsContainerValidator::default().validate(&walker, got_eof), false, ) } diff --git a/src/mdschema/validator/node_walker/validators/matchers.rs b/src/mdschema/validator/node_walker/validators/matchers.rs index a1cca40..d07ee0d 100644 --- a/src/mdschema/validator/node_walker/validators/matchers.rs +++ b/src/mdschema/validator/node_walker/validators/matchers.rs @@ -4,7 +4,6 @@ //! Types: //! - `MatcherVsTextValidator`: handles pattern matching and capture logic used //! when schema nodes embed matcher syntax inside textual content. - use log::trace; use serde_json::json; use tree_sitter::TreeCursor; @@ -27,10 +26,11 @@ use crate::mdschema::validator::validator_walker::ValidatorWalker; use super::textual::validate_textual_vs_textual_direct; +#[derive(Default)] pub(super) struct MatcherVsTextValidator; impl ValidatorImpl for MatcherVsTextValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); @@ -312,7 +312,7 @@ impl ValidatorImpl for MatcherVsTextValidator { } // Delegate to the literal matcher validator - return LiteralMatcherVsTextualValidator::validate( + return LiteralMatcherVsTextualValidator::default().validate( &walker.with_cursors(&schema_cursor, &input_cursor), got_eof, ); @@ -418,10 +418,11 @@ impl ValidatorImpl for MatcherVsTextValidator { } } +#[derive(Default)] pub(super) struct TextualVsMatcherValidator; impl ValidatorImpl for TextualVsMatcherValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); @@ -587,10 +588,11 @@ impl ValidatorImpl for TextualVsMatcherValidator { } } +#[derive(Default)] pub(super) struct LiteralMatcherVsTextualValidator; impl ValidatorImpl for LiteralMatcherVsTextualValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let schema_cursor: &TreeCursor = walker.schema_cursor(); let input_cursor: &TreeCursor = walker.input_cursor(); let mut result = ValidationResult::from_cursors(schema_cursor, input_cursor); @@ -866,7 +868,7 @@ mod tests { let walker = ValidatorWalker::from_cursors(&schema_cursor, schema_str, &input_cursor, input_str); - let result = TextualVsTextualValidator::validate(&walker, true); + let result = TextualVsTextualValidator::default().validate(&walker, true); assert!(result.errors().is_empty()); assert_eq!(result.value(), &json!({"test": "test"})); @@ -882,7 +884,7 @@ mod tests { .goto_first_child_then_unwrap() .validate_complete(); - assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(5, 2)); + assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(4, 2)); assert!(result.errors().is_empty()); assert_eq!(result.value(), &json!({"test": "test"})); } diff --git a/src/mdschema/validator/node_walker/validators/mod.rs b/src/mdschema/validator/node_walker/validators/mod.rs index 2f400e4..3469622 100644 --- a/src/mdschema/validator/node_walker/validators/mod.rs +++ b/src/mdschema/validator/node_walker/validators/mod.rs @@ -33,12 +33,12 @@ pub(super) mod quotes; pub(super) mod tables; pub(super) mod textual; -pub trait ValidatorImpl { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult; +pub trait ValidatorImpl: Default { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult; } pub trait Validator { - fn validate(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult; + fn validate(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult; } impl Validator for T { @@ -47,8 +47,8 @@ impl Validator for T { i = %walker.input_cursor().descendant_index(), s = %walker.schema_cursor().descendant_index(), ), ret)] - fn validate(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - Self::validate_impl(walker, got_eof) + fn validate(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + self.validate_impl(walker, got_eof) } } @@ -107,7 +107,7 @@ mod test_utils { input_str: &'a str, } - impl<'a, V: Validator> ValidationTesterWalker<'a, V> { + impl<'a, V: Validator + Default> ValidationTesterWalker<'a, V> { pub fn validate(&mut self, got_eof: bool) -> ValidationResult { self.print(); @@ -117,7 +117,7 @@ mod test_utils { &self.input_cursor, self.input_str, ); - V::validate(&walker, got_eof) + V::default().validate(&walker, got_eof) } pub fn validate_complete(&mut self) -> ValidationResult { diff --git a/src/mdschema/validator/node_walker/validators/nodes.rs b/src/mdschema/validator/node_walker/validators/nodes.rs index 913f139..a17804c 100644 --- a/src/mdschema/validator/node_walker/validators/nodes.rs +++ b/src/mdschema/validator/node_walker/validators/nodes.rs @@ -5,12 +5,10 @@ //! based on node kinds and performs shared structural checks. use log::trace; -use crate::mdschema::validator::errors::{SchemaError, ValidationError}; use crate::mdschema::validator::node_pos_pair::NodePosPair; use crate::mdschema::validator::node_walker::ValidationResult; -use crate::mdschema::validator::node_walker::helpers::check_repeating_matchers::check_repeating_matchers; use crate::mdschema::validator::node_walker::validators::code::CodeVsCodeValidator; -use crate::mdschema::validator::node_walker::validators::containers::TextualContainerVsTextualContainerValidator; +use crate::mdschema::validator::node_walker::validators::containers::ContainerVsContainerValidator; use crate::mdschema::validator::node_walker::validators::headings::HeadingVsHeadingValidator; use crate::mdschema::validator::node_walker::validators::links::LinkVsLinkValidator; use crate::mdschema::validator::node_walker::validators::lists::ListVsListValidator; @@ -30,10 +28,11 @@ use crate::{compare_node_children_lengths_check, compare_node_kinds_check, invar /// - Code blocks -> `CodeVsCodeValidator::validate` /// - Lists -> `ListVsListValidator::validate` /// - Headings/documents -> recursively validate children +#[derive(Default)] pub struct NodeVsNodeValidator; impl ValidatorImpl for NodeVsNodeValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { validate_node_vs_node_impl(walker, got_eof) } } @@ -52,75 +51,43 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida if both_are_textual_nodes(&schema_node, &input_node) { trace!("Both are textual nodes, validating text vs text"); - return TextualVsTextualValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + return TextualVsTextualValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are codeblock nodes else if both_are_codeblocks(&schema_node, &input_node) { - return CodeVsCodeValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + return CodeVsCodeValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } else if both_are_quotes(&schema_node, &input_node) { - return QuoteVsQuoteValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + return QuoteVsQuoteValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are tables else if both_are_tables(&schema_node, &input_node) { - return TableVsTableValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + return TableVsTableValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are textual containers else if both_are_textual_containers(&schema_node, &input_node) { - // If we have top level textual containers, they CANNOT have repeating - // matchers. `validate_textual_container_vs_textual_container` allows - // the containers to contain repeating matchers since the same utility - // is used for list validation. - - if let Some(repeating_matcher_index) = - check_repeating_matchers(&schema_cursor, walker.schema_str()) - { - result.add_error(ValidationError::SchemaError( - SchemaError::RepeatingMatcherInTextContainer { - schema_index: repeating_matcher_index, - }, - )); - return result; - } - - return TextualContainerVsTextualContainerValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + return ContainerVsContainerValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are textual nodes else if both_are_textual_nodes(&schema_node, &input_node) { - return TextualVsTextualValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + return TextualVsTextualValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are link nodes or image nodes else if both_are_link_nodes(&schema_node, &input_node) || both_are_image_nodes(&schema_node, &input_node) { - return LinkVsLinkValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + return LinkVsLinkValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are list nodes else if both_are_list_nodes(&schema_node, &input_node) { - return ListVsListValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + return ListVsListValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are ruler nodes else if both_are_rulers(&schema_node, &input_node) { @@ -129,10 +96,8 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida // First, if they are headings, validate the headings themselves. trace!("Both are heading nodes, validating heading vs heading"); - let heading_result = HeadingVsHeadingValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + let heading_result = HeadingVsHeadingValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&heading_result); @@ -161,10 +126,8 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida input_cursor.goto_first_child(), ) { (true, true) => { - let new_result = NodeVsNodeValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + let new_result = NodeVsNodeValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&new_result); result.sync_cursor_pos(&schema_cursor, &input_cursor); } @@ -186,10 +149,8 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida input_cursor.goto_next_sibling(), ) { (true, true) => { - let new_result = NodeVsNodeValidator::validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + let new_result = NodeVsNodeValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&new_result); result.sync_cursor_pos(&schema_cursor, &input_cursor); } @@ -242,6 +203,7 @@ mod tests { use crate::mdschema::validator::{ errors::{SchemaViolationError, ValidationError}, node_pos_pair::NodePosPair, + ts_types::both_are_paragraphs, }; #[test] @@ -475,4 +437,59 @@ mod tests { assert_eq!(result.errors(), []); assert_eq!(result.value(), &json!({})); } + + #[test] + fn test_node_vs_node_repeated_paragraph_nothing_after() { + let schema_str = r#" +`items`{,} +"#; + let input_str = r#" +foo + +bar + +buzz +"#; + + let result = ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_paragraphs(s, i))) + .validate_complete(); + + assert_eq!(*result.farthest_reached_pos(), NodePosPair::from_pos(1, 5)); + assert_eq!(result.errors(), vec![]); + assert_eq!(*result.value(), json!({"items": ["foo", "bar", "buzz"]})); + } + + #[test] + fn test_node_vs_node_repeated_paragraph_heading_after() { + let schema_str = r#" +`items`{,} + +# Test +"#; + let input_str = r#" +foo + +bar + +buzz + +# Test +"#; + + let result = ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_paragraphs(s, i))) + .validate_complete(); + + assert_eq!( + *result.farthest_reached_pos(), + NodePosPair::from_pos(5, 7) // at the subsequent heading + ); + assert_eq!(result.errors(), vec![]); + assert_eq!(*result.value(), json!({"items": ["foo", "bar", "buzz"]})); + } } diff --git a/src/mdschema/validator/node_walker/validators/quotes.rs b/src/mdschema/validator/node_walker/validators/quotes.rs index bfec545..ff2b30f 100644 --- a/src/mdschema/validator/node_walker/validators/quotes.rs +++ b/src/mdschema/validator/node_walker/validators/quotes.rs @@ -4,7 +4,7 @@ //! - `QuoteVsQuoteValidator`: verifies quote node kinds and delegates content //! validation to textual containers. use crate::mdschema::validator::node_walker::ValidationResult; -use crate::mdschema::validator::node_walker::validators::containers::TextualContainerVsTextualContainerValidator; +use crate::mdschema::validator::node_walker::validators::containers::ContainerVsContainerValidator; use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; use crate::mdschema::validator::validator_walker::ValidatorWalker; use crate::{compare_node_kinds_check, invariant_violation}; @@ -15,11 +15,12 @@ use crate::{compare_node_kinds_check, invariant_violation}; /// 1. Checking that both nodes are block_quote nodes /// 2. Moving into the first child of both schema and input /// 3. Delegating to TextualContainerVsTextualContainerValidator for content validation +#[derive(Default)] pub(super) struct QuoteVsQuoteValidator; impl ValidatorImpl for QuoteVsQuoteValidator { #[track_caller] - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); @@ -60,7 +61,7 @@ impl ValidatorImpl for QuoteVsQuoteValidator { } // Delegate to TextualContainerVsTextualContainerValidator for the children - return TextualContainerVsTextualContainerValidator::validate( + return ContainerVsContainerValidator::default().validate( &walker.with_cursors(&schema_cursor, &input_cursor), got_eof, ); diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index 97fcb89..3e01eb7 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -13,7 +13,7 @@ use crate::mdschema::validator::errors::{ }; use crate::mdschema::validator::node_pos_pair::NodePosPair; use crate::mdschema::validator::node_walker::ValidationResult; -use crate::mdschema::validator::node_walker::validators::containers::TextualContainerVsTextualContainerValidator; +use crate::mdschema::validator::node_walker::validators::containers::ContainerVsContainerValidator; use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; #[cfg(feature = "invariant_violations")] use crate::mdschema::validator::ts_types::*; @@ -22,10 +22,11 @@ use crate::mdschema::validator::validator_walker::ValidatorWalker; use tree_sitter::TreeCursor; /// Validate two tables. +#[derive(Default)] pub(super) struct TableVsTableValidator; impl ValidatorImpl for TableVsTableValidator { - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { validate_impl(walker, got_eof) } } @@ -148,7 +149,7 @@ fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { // jump to the next sibling pair. If there is no next sibling // pair we are done. 'col_iter: loop { - let cell_result = TextualContainerVsTextualContainerValidator::validate( + let cell_result = ContainerVsContainerValidator::default().validate( &walker.with_cursors(&schema_cursor, &input_cursor), got_eof, ); diff --git a/src/mdschema/validator/node_walker/validators/textual.rs b/src/mdschema/validator/node_walker/validators/textual.rs index c7f134b..03c3eb5 100644 --- a/src/mdschema/validator/node_walker/validators/textual.rs +++ b/src/mdschema/validator/node_walker/validators/textual.rs @@ -26,11 +26,12 @@ use crate::mdschema::validator::{ /// text node and the next node is a `code_span`. If so, delegate to /// `MatcherVsTextValidator::validate`. /// 2. Otherwise, check that the node kind and text contents are the same. +#[derive(Default)] pub(super) struct TextualVsTextualValidator; impl ValidatorImpl for TextualVsTextualValidator { #[track_caller] - fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { validate_textual_vs_textual_impl(walker, got_eof) } } @@ -48,7 +49,7 @@ fn validate_textual_vs_textual_impl(walker: &ValidatorWalker, got_eof: bool) -> }; if current_node_is_code_node || current_node_is_text_node_and_next_node_code_node { - return MatcherVsTextValidator::validate(walker, got_eof); + return MatcherVsTextValidator::default().validate(walker, got_eof); } validate_textual_vs_textual_direct( diff --git a/src/mdschema/validator/validator.rs b/src/mdschema/validator/validator.rs index e39fea2..4041a1c 100644 --- a/src/mdschema/validator/validator.rs +++ b/src/mdschema/validator/validator.rs @@ -177,7 +177,7 @@ impl Validator { farthest_reached_pos.walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); let walker = ValidatorWalker::new(schema_cursor, &schema_str, input_cursor, &input_str); - NodeVsNodeValidator::validate(&walker, got_eof) + NodeVsNodeValidator::default().validate(&walker, got_eof) }; self.push_validation_result(validation_result); From 72065f9eabe13496ddb32854c727ec03bbe9ac3f Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 18:39:59 -0500 Subject: [PATCH 06/33] finish repeated paragraphs --- Cargo.lock | 171 ++++++++++++----- Cargo.toml | 1 + README.md | 17 ++ .../src/content/docs/matchers/02-matchers.mdx | 172 +++++++++--------- .../node_walker/validators/containers.rs | 127 +++++++++++-- .../validator/node_walker/validators/lists.rs | 33 ++-- .../validator/node_walker/validators/mod.rs | 3 +- 7 files changed, 356 insertions(+), 168 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fde13c8..d342f16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -144,9 +144,9 @@ checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" [[package]] name = "cc" -version = "1.2.49" +version = "1.2.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" +checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3" dependencies = [ "find-msvc-tools", "shlex", @@ -160,9 +160,9 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "clap" -version = "4.5.53" +version = "4.5.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" +checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" dependencies = [ "clap_builder", "clap_derive", @@ -170,9 +170,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.53" +version = "4.5.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" +checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" dependencies = [ "anstream", "anstyle", @@ -286,6 +286,72 @@ dependencies = [ "typenum", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn", +] + [[package]] name = "digest" version = "0.10.7" @@ -381,9 +447,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.5" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" +checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41" [[package]] name = "fixedbitset" @@ -473,11 +539,17 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -502,9 +574,9 @@ checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "json5" @@ -531,9 +603,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.178" +version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" [[package]] name = "libredox" @@ -579,6 +651,7 @@ dependencies = [ "ariadne", "clap", "colored", + "derive_builder", "env_logger", "envy", "line-col", @@ -710,9 +783,9 @@ checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" [[package]] name = "pest" -version = "2.8.4" +version = "2.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbcfd20a6d4eeba40179f05735784ad32bdaef05ce8e8af05f180d45bb3e7e22" +checksum = "2c9eb05c21a464ea704b53158d358a31e6425db2f63a1a7312268b05fe2b75f7" dependencies = [ "memchr", "ucd-trie", @@ -720,9 +793,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.8.4" +version = "2.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51f72981ade67b1ca6adc26ec221be9f463f2b5839c7508998daa17c23d94d7f" +checksum = "68f9dbced329c441fa79d80472764b1a2c7e57123553b8519b36663a2fb234ed" dependencies = [ "pest", "pest_generator", @@ -730,9 +803,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.8.4" +version = "2.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee9efd8cdb50d719a80088b76f81aec7c41ed6d522ee750178f83883d271625" +checksum = "3bb96d5051a78f44f43c8f712d8e810adb0ebf923fc9ed2655a7f66f63ba8ee5" dependencies = [ "pest", "pest_meta", @@ -743,9 +816,9 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.8.4" +version = "2.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf1d70880e76bdc13ba52eafa6239ce793d85c8e43896507e43dd8984ff05b82" +checksum = "602113b5b5e8621770cfd490cfd90b9f84ab29bd2b0e49ad83eb6d186cef2365" dependencies = [ "pest", "sha2", @@ -791,9 +864,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" dependencies = [ "unicode-ident", ] @@ -815,9 +888,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.42" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" dependencies = [ "proc-macro2", ] @@ -892,9 +965,9 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ "bitflags", "errno", @@ -903,12 +976,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "ryu" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" - [[package]] name = "serde" version = "1.0.228" @@ -951,16 +1018,16 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.145" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "indexmap", "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -1018,9 +1085,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.111" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -1053,9 +1120,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.23.0" +version = "3.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" dependencies = [ "fastrand", "getrandom 0.3.4", @@ -1192,9 +1259,9 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" [[package]] name = "tracing" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -1214,9 +1281,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.35" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", "valuable", @@ -1547,20 +1614,26 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" dependencies = [ "proc-macro2", "quote", "syn", ] + +[[package]] +name = "zmij" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fc5a66a20078bf1251bde995aa2fdcc4b800c70b5d92dd2c62abc5c60f679f8" diff --git a/Cargo.toml b/Cargo.toml index 22daed9..fe69ca5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ paste = "1.0.15" tabled = "0.20.0" mdvalidate-utils = {version = "0.0.1", path = "utils"} thiserror = "2.0.17" +derive_builder = "0.20.2" [dev-dependencies] ptree = "0.5.2" diff --git a/README.md b/README.md index 4473343..30a0a37 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,10 @@ Schema: - `feature:/[A-Za-z][\w -]+/`{2,4} - `detail:/[a-z][\w -]+/`{,2} +## Description + +`description`{2,3} + Inline: `code`! and `bang`!! ```{lang:/\w+/} @@ -66,6 +70,14 @@ Input: - fewer allocations - Safer IO +## Description + +This release focuses on performance improvements and safety enhancements. + +Key changes include optimized memory management and stricter type checking. + +We've also improved error messages throughout the codebase. + Inline: `code` and `bang`! ```rust @@ -94,6 +106,11 @@ Output: { "build": "7A9F3C1", "checked": "print(\"ok\")", + "description": [ + "This release focuses on performance improvements and safety enhancements.", + "Key changes include optimized memory management and stricter type checking.", + "We've also improved error messages throughout the codebase." + ], "detail": [ "fewer allocations" ], diff --git a/docs/src/content/docs/matchers/02-matchers.mdx b/docs/src/content/docs/matchers/02-matchers.mdx index 3f515ec..90f1395 100644 --- a/docs/src/content/docs/matchers/02-matchers.mdx +++ b/docs/src/content/docs/matchers/02-matchers.mdx @@ -7,42 +7,89 @@ order: 2 import SchemaAndInput from "../../../components/SchemaAndInput.astro"; import TODO from "../../../components/TODO.astro"; -Matchers allow you to validate dynamic content using regular expressions. A matcher is defined using inline code syntax with a specific format: `` `label:/pattern/` ``. +Matchers allow you to validate dynamic content in your Markdown documents. There are two types of matchers: **regex matchers** that match patterns using regular expressions, and **all matchers** that match everything as an identity function. -# Syntax +# Matcher Types -The basic matcher format is: +## Regex Matchers + +A regex matcher is defined using inline code syntax with a specific format: `` `label:/pattern/` `` ``` `label:/regex-pattern/` ``` - **label**: An identifier for the matched value (used in validation output) -- **pattern**: A JavaScript-compatible regular expression +- **pattern**: A regular expression that matches the content The pattern is automatically anchored to the start (as if prefixed with `^`), so it matches from the beginning of the available text. +### Simple Examples + - + + + + +## All Matchers + +All matchers act as an identity function - they **always** match and return exactly what was passed to them. If a matcher has no regex pattern (just a label in backticks), it becomes an all matcher that accepts all available content in the current context. + +The syntax is simply `` `label` `` without a regex pattern. + + - + -## With Surrounding Text +All matchers accept any input including special characters, spaces, and other spanning (inline) nodes: -Matchers can be combined with literal text as prefixes and suffixes: + + + + + + +# Matchers with Surrounding Text + +Both regex matchers and all matchers can be combined with literal text as prefixes and suffixes: -This also works for spanning nodes of other types, like italics and subsequent code spans via literal matchers (more on this later). +## Spanning Multiple Node Types + +Matchers can work across different spanning node types, like italics and subsequent code spans: +# Label Naming Rules + +Matcher labels (for both regex matchers and all matchers) must follow these rules: + +- Must contain only alphanumeric characters (a-z, A-Z, 0-9), hyphens (`-`), and underscores (`_`) +- Cannot contain spaces or other special characters +- Valid examples: `user_name`, `item-count`, `id123`, `MyData` +- Invalid examples: `user name` (space), `data@field` (special char), `item.count` (period) + ## Empty Labels @@ -100,15 +158,15 @@ This also works for spanning nodes of other types, like italics and subsequent c To match without capturing a value, use an underscore (`_`) as the label: -## Multiple matchers +# Multiple Matchers -Right now, you can only have one matcher per paragraph (collection of spanning elements). So, for example, the following will not work. +Right now, you can only have one matcher per paragraph (collection of spanning elements). So, for example, the following will not work: +You can validate multiple paragraph nodes into an array by using a repeated matcher. The repeated matcher syntax is `` {min,max} ``, where `min` and `max` are optional. -You can validate multiple paragraph nodes into an array by using a repeated matcher. The repeated matcher syntax will show up again for lists, it looks like `` {min,max} ``, where `min` and `max` are optional. +**Important:** Repeating paragraph matchers must be **all matchers** (`` `label` ``), not regex matchers. This is because each paragraph can contain arbitrary content and structure. -The matched content is passed to the executable: -- As arguments (`$0`, `$1`, etc.) -- Via stdin (if command reads from stdin) + # Literal Code Blocks @@ -162,63 +222,7 @@ To match inline code blocks literally instead of treating them as matchers, add valid={false} /> -# Everything Matchers - -If a matcher has no regex pattern (just a label in backticks), it will match everything available in the current context as an identity function. The syntax is simply `` `label` ``, where the label follows the naming rules below. - -## Naming Rules - -Matcher labels (both for regex matchers and everything matchers) must follow these rules: - -- Must contain only alphanumeric characters (a-z, A-Z, 0-9), hyphens (`-`), and underscores (`_`) -- Cannot contain spaces or other special characters -- Valid examples: `user_name`, `item-count`, `id123`, `MyData` -- Invalid examples: `user name` (space), `data@field` (special char), `item.count` (period) - -## Behavior - -Everything matchers act as an identity function - they **always** match and return exactly what was passed to them, including special characters, spaces, and any other content: - - - - - - - -When used in a paragraph context, everything matchers will capture all spanning elements including formatting like italics: - - - - - -Everything matchers never fail to match - they accept any input and return it unchanged under the specified label. - -### Escaping the Exclamation Mark +## Escaping the Exclamation Mark @@ -237,7 +241,7 @@ Use `!!` to match a literal exclamation mark after code: You can validate content by running an executable. Use the syntax `label:!command`: Self { - Self { - allow_repeating: true, - } - } -} - impl ValidatorImpl for ContainerVsContainerValidator { fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = @@ -104,6 +97,19 @@ impl ValidatorImpl for ContainerVsContainerValidator { .validate(walker, got_eof); } + if !self.allow_repeating { + if let Some(repeating_matcher_index) = + check_repeating_matchers(&schema_cursor, walker.schema_str()) + { + result.add_error(ValidationError::SchemaError( + SchemaError::RepeatingMatcherInTextContainer { + schema_index: repeating_matcher_index, + }, + )); + return result; + } + } + match count_non_literal_matchers_in_children(&schema_cursor, walker.schema_str()) { Ok(non_literal_matchers_in_children) if non_literal_matchers_in_children > 1 && got_eof => @@ -296,7 +302,21 @@ impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { } if matches.len() < extras.min_items().unwrap_or(0) { - todo!("Too few items for repeating matcher"); + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + // That's ok. We may get them later. + return result; + } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::WrongListCount { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + min: extras.min_items(), + max: extras.max_items(), + actual: matches.len(), + }, + )); + return result; + } } input_cursor.goto_next_sibling(); @@ -666,4 +686,81 @@ foo // Should have an error since "foo" doesn't match the pattern "^test" assert!(!result.errors().is_empty()); } + + #[test] + fn test_paragraph_vs_paragraph_with_min() { + let schema_str = r#" +`data`{2,} +"#; + let input_str = r#" +test +"#; + + let result = + ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_paragraphs(s, i))) + .validate_complete(); + + assert_eq!( + result.errors(), + vec![ValidationError::SchemaViolation( + SchemaViolationError::WrongListCount { + schema_index: 2, + input_index: 1, + min: Some(2), + max: None, + actual: 1 + } + )] + ); + assert_eq!(*result.value(), json!({})); + } + + #[test] + fn test_paragraph_vs_paragraph_with_min_incomplete() { + let schema_str = r#" +`data`{2,} +"#; + let input_str = r#" +test +"#; + + let result = + ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_paragraphs(s, i))) + .validate_incomplete(); + + // no errors yet since incomplete. we don't have too many, we have too + // few, so we may get them later + assert_eq!(result.errors(), vec![]); + assert_eq!(*result.value(), json!({})); // no matches yet + } + + #[test] + fn test_paragraph_vs_paragraph_with_max() { + let schema_str = r#" +`data`{,2} +"#; + let input_str = r#" +test + +foo + +bar +"#; + + let result = + ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_paragraphs(s, i))) + .validate_complete(); + + assert_eq!(result.errors(), vec![]); // stops yoinking after the max + assert_eq!(*result.value(), json!({"data": ["test", "foo"]})); + } } diff --git a/src/mdschema/validator/node_walker/validators/lists.rs b/src/mdschema/validator/node_walker/validators/lists.rs index bce770c..d289733 100644 --- a/src/mdschema/validator/node_walker/validators/lists.rs +++ b/src/mdschema/validator/node_walker/validators/lists.rs @@ -11,7 +11,8 @@ use crate::mdschema::validator::{ node_walker::{ ValidationResult, validators::{ - Validator, ValidatorImpl, containers::ContainerVsContainerValidator, + Validator, ValidatorImpl, + containers::{ContainerVsContainerValidatorBuilder}, }, }, ts_types::*, @@ -309,10 +310,8 @@ impl ValidatorImpl for ListVsListValidator { // If there are more items to validate AT THE SAME LEVEL, recurse to // validate them. We now use the *next* schema node too. if schema_cursor.goto_next_sibling() && input_cursor.goto_next_sibling() { - let next_result = ListVsListValidator::default().validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + let next_result = ListVsListValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&next_result); } @@ -330,10 +329,8 @@ impl ValidatorImpl for ListVsListValidator { schema_cursor.node().kind() ); - let next_result = ListVsListValidator::default().validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + let next_result = ListVsListValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); // We need to be able to capture errors that happen in the recursive call result.join_errors(next_result.errors()); values_at_level.push(next_result.value().clone()); @@ -495,10 +492,8 @@ impl ValidatorImpl for ListVsListValidator { input_cursor.goto_first_child(); schema_cursor.goto_first_child(); - let deeper_result = ListVsListValidator::default().validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + let deeper_result = ListVsListValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&deeper_result); } } @@ -506,10 +501,8 @@ impl ValidatorImpl for ListVsListValidator { // Recurse on next sibling if available! if schema_cursor.goto_next_sibling() && input_cursor.goto_next_sibling() { trace!("Moving to next sibling list items for continued validation"); - let new_matches = ListVsListValidator::default().validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + let new_matches = ListVsListValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&new_matches); } else { trace!("No more sibling pairs found, validation complete"); @@ -606,7 +599,11 @@ fn validate_list_item_contents_vs_list_item_contents( ValidatorWalker::from_cursors(&schema_cursor, schema_str, &input_cursor, input_str); ( - ContainerVsContainerValidator::default().validate(&walker, got_eof), + ContainerVsContainerValidatorBuilder::default() + .allow_repeating(true) + .build() + .unwrap() + .validate(&walker, got_eof), false, ) } diff --git a/src/mdschema/validator/node_walker/validators/mod.rs b/src/mdschema/validator/node_walker/validators/mod.rs index 3469622..e617458 100644 --- a/src/mdschema/validator/node_walker/validators/mod.rs +++ b/src/mdschema/validator/node_walker/validators/mod.rs @@ -36,8 +36,7 @@ pub(super) mod textual; pub trait ValidatorImpl: Default { fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult; } - -pub trait Validator { +pub trait Validator: Default { fn validate(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult; } From 70627f38f53f010c942c3f37c72be671754b2771 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 18:56:55 -0500 Subject: [PATCH 07/33] start work on repeated tables --- .../node_walker/validators/tables.rs | 167 +++++++++++++----- src/mdschema/validator/ts_types.rs | 6 + 2 files changed, 133 insertions(+), 40 deletions(-) diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index 3e01eb7..6751ef2 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -11,14 +11,15 @@ use crate::invariant_violation; use crate::mdschema::validator::errors::{ MalformedStructureKind, SchemaViolationError, ValidationError, }; +use crate::mdschema::validator::matcher::matcher_extras::MatcherExtras; use crate::mdschema::validator::node_pos_pair::NodePosPair; use crate::mdschema::validator::node_walker::ValidationResult; use crate::mdschema::validator::node_walker::validators::containers::ContainerVsContainerValidator; use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; -#[cfg(feature = "invariant_violations")] use crate::mdschema::validator::ts_types::*; -use crate::mdschema::validator::ts_utils::waiting_at_end; +use crate::mdschema::validator::ts_utils::{get_node_text, waiting_at_end}; use crate::mdschema::validator::validator_walker::ValidatorWalker; +use log::trace; use tree_sitter::TreeCursor; /// Validate two tables. @@ -149,10 +150,8 @@ fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { // jump to the next sibling pair. If there is no next sibling // pair we are done. 'col_iter: loop { - let cell_result = ContainerVsContainerValidator::default().validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + let cell_result = ContainerVsContainerValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&cell_result); match ( @@ -224,40 +223,64 @@ fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { result } -// #[derive(Debug, Clone, PartialEq, Eq, Error)] -// pub enum MDTableError { -// #[error("Column count mismatch: expected {expected}, got {got}")] -// ColumnCountMismatch { expected: usize, got: usize }, -// } - -// struct MDTable { -// columns: usize, -// rows: Vec>>, -// } - -// impl MDTable { -// pub fn new(column_count: usize) -> Self { -// MDTable { -// columns: column_count, -// rows: Vec::new(), -// } -// } - -// pub fn add_row(&mut self, row: Vec>) -> Result<(), MDTableError> { -// if row.len() != self.columns { -// return Err(MDTableError::ColumnCountMismatch { -// expected: self.columns, -// got: row.len(), -// }); -// } -// self.rows.push(row); -// Ok(()) -// } - -// pub fn iter_rows(&self) -> impl Iterator>> { -// self.rows.iter() -// } -// } +/// We say that a row is repeated if there is a repeater directly after the row. +/// +/// Example: +/// ```markdown +/// |c1|c2| +/// |-|-| +/// |r1|r2|{1,2} (a row like this row can appear 1-2 times) +/// ``` +fn try_get_repeated_row_bounds( + schema_cursor: &TreeCursor, + schema_str: &str, +) -> Option<(Option, Option)> { + #[cfg(feature = "invariant_violations")] + if !is_table_data_row_node(&schema_cursor.node()) { + invariant_violation!( + "is_repeated_row only works for data row nodes. Title row nodes cannot be repeated. Got {:?}", + schema_cursor.node().kind() + ) + } + + let mut schema_cursor = schema_cursor.clone(); + + if !schema_cursor.goto_first_child() { + // If there are no children then we can't be a repeated row. + + return None; + } + + #[cfg(feature = "invariant_violations")] + if !is_table_cell_node(&schema_cursor.node()) { + invariant_violation!("at this point we should be at a table cell") + } + + // Go to the last sibling + while schema_cursor.goto_next_sibling() {} + + if schema_cursor.goto_first_child() && is_text_node(&schema_cursor.node()) { + let node_str = get_node_text(&schema_cursor.node(), schema_str); + + match MatcherExtras::try_from_extras_str(node_str) { + Ok(extras) if extras.had_min_max() => Some((extras.min_items(), extras.max_items())), + Ok(extras) => { + trace!("Got non-repeating extras: {:?}", extras); + + None + } + Err(error) => { + trace!("Error parsing matcher extras: {:?}", error); + + None + } + } + } else { + trace!("Unexpected node kind: {:?}", schema_cursor.node().kind()); + + None + } +} #[cfg(test)] mod tests { @@ -266,9 +289,73 @@ mod tests { use crate::mdschema::validator::{ errors::{NodeContentMismatchKind, SchemaViolationError, ValidationError}, node_pos_pair::NodePosPair, + ts_utils::parse_markdown, }; use serde_json::json; + #[test] + fn test_is_repeated_row_is_repeated() { + let schema_str = r#" +|c1|c2| +|-|-| +|r1|r2|{1,2} +"#; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); // document -> table + schema_cursor.goto_first_child(); // table -> header row + schema_cursor.goto_next_sibling(); // header row -> delimiter row + schema_cursor.goto_next_sibling(); // delimiter row -> data row + assert!(is_table_data_row_node(&schema_cursor.node())); + + assert_eq!( + try_get_repeated_row_bounds(&schema_cursor, schema_str).unwrap(), + (Some(1), Some(2)) + ) + } + + #[test] + fn test_is_repeated_row_is_repeated_broken() { + let schema_str = r#" +|c1|c2| +|-|-| +|r1|r2|{1,2 +"#; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); // document -> table + schema_cursor.goto_first_child(); // table -> header row + schema_cursor.goto_next_sibling(); // header row -> delimiter row + schema_cursor.goto_next_sibling(); // delimiter row -> data row + assert!(is_table_data_row_node(&schema_cursor.node())); + + assert_eq!( + try_get_repeated_row_bounds(&schema_cursor, schema_str), + None + ) + } + + #[test] + fn test_is_repeated_row_is_not_repeated() { + let schema_str = r#" +|c1|c2| +|-|-| +|r1|r2| +"#; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); // document -> table + schema_cursor.goto_first_child(); // table -> header row + schema_cursor.goto_next_sibling(); // header row -> delimiter row + schema_cursor.goto_next_sibling(); // delimiter row -> data row + assert!(is_table_data_row_node(&schema_cursor.node())); + + assert_eq!( + try_get_repeated_row_bounds(&schema_cursor, schema_str), + None + ) + } + #[test] fn test_validate_table_vs_table_simple_literal() { let schema_str = r#" diff --git a/src/mdschema/validator/ts_types.rs b/src/mdschema/validator/ts_types.rs index f617d36..fe1bcc2 100644 --- a/src/mdschema/validator/ts_types.rs +++ b/src/mdschema/validator/ts_types.rs @@ -132,6 +132,12 @@ node_kind_pair!( "Check if both nodes are tables.", ["table"] ); +node_kind_pair!( + is_table_data_row_node, + both_are_table_data_rows, + "Check if both nodes are table data rows.", + ["table_data_row"] +); node_kind_pair!( is_table_cell_node, both_are_table_cells, From 33581c1b4cca347a11f0609d6c7da78ca967e200 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 19:20:54 -0500 Subject: [PATCH 08/33] more progress for repeated tables --- .../node_walker/validators/tables.rs | 48 ++++++++++++++++++- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index 6751ef2..0336f75 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -7,7 +7,6 @@ // use std::rc::Rc; // use thiserror::Error; -use crate::invariant_violation; use crate::mdschema::validator::errors::{ MalformedStructureKind, SchemaViolationError, ValidationError, }; @@ -17,9 +16,11 @@ use crate::mdschema::validator::node_walker::ValidationResult; use crate::mdschema::validator::node_walker::validators::containers::ContainerVsContainerValidator; use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; use crate::mdschema::validator::ts_types::*; -use crate::mdschema::validator::ts_utils::{get_node_text, waiting_at_end}; +use crate::mdschema::validator::ts_utils::{get_node_text, waiting_at_end, walk_to_root}; use crate::mdschema::validator::validator_walker::ValidatorWalker; +use crate::{invariant_violation, trace_cursors}; use log::trace; +use mdvalidate_utils::PrettyPrint; use tree_sitter::TreeCursor; /// Validate two tables. @@ -145,6 +146,14 @@ fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { ), } + // First check if we are dealing with a special case -- repeated rows! + if both_are_table_data_rows(&schema_cursor.node(), &input_cursor.node()) + && let Some(_bounds) = + try_get_repeated_row_bounds(&schema_cursor, walker.schema_str()) + { + todo!() + } + // we are at the first cell initially. we validate the first // cell in the input vs the first cell in the schema, and then // jump to the next sibling pair. If there is no next sibling @@ -243,6 +252,20 @@ fn try_get_repeated_row_bounds( ) } + // If we have a table like: + // + // |c1|c2| + // |-|-| + // |r1{1,2}|{2,}| + // + // We don't want to lock onto the {2,} + let full_row_str = get_node_text(&schema_cursor.node(), schema_str); + // We are guaranteed there will be a cell at the very end that could be a + // correct repeater if the cell does not end with "|" or ":" + if full_row_str.ends_with(|c| c == '|' || c == ':') { + return None; + } + let mut schema_cursor = schema_cursor.clone(); if !schema_cursor.goto_first_child() { @@ -335,6 +358,27 @@ mod tests { ) } + #[test] + fn test_is_repeated_row_is_not_repeated_bounds_in_wrong_place() { + let schema_str = r#" +|c1|c2| +|-|-| +|r1{21,5}|{2,}| +"#; + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); // document -> table + schema_cursor.goto_first_child(); // table -> header row + schema_cursor.goto_next_sibling(); // header row -> delimiter row + schema_cursor.goto_next_sibling(); // delimiter row -> data row + assert!(is_table_data_row_node(&schema_cursor.node())); + + assert_eq!( + try_get_repeated_row_bounds(&schema_cursor, schema_str), + None + ) + } + #[test] fn test_is_repeated_row_is_not_repeated() { let schema_str = r#" From 0e74be54f72309f9eb172b41e6edfbefabaa0998 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 19:22:56 -0500 Subject: [PATCH 09/33] reorganize --- .../node_walker/validators/tables.rs | 360 +++++++++--------- 1 file changed, 177 insertions(+), 183 deletions(-) diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index 0336f75..4696845 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -3,10 +3,7 @@ //! Types: //! - `TableVsTableValidator`: validates table structure (rows, headers, cells) //! and delegates cell content checks to textual container validation. -// use std::os::raw::c_short; -// use std::rc::Rc; -// use thiserror::Error; - +use crate::invariant_violation; use crate::mdschema::validator::errors::{ MalformedStructureKind, SchemaViolationError, ValidationError, }; @@ -16,11 +13,9 @@ use crate::mdschema::validator::node_walker::ValidationResult; use crate::mdschema::validator::node_walker::validators::containers::ContainerVsContainerValidator; use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; use crate::mdschema::validator::ts_types::*; -use crate::mdschema::validator::ts_utils::{get_node_text, waiting_at_end, walk_to_root}; +use crate::mdschema::validator::ts_utils::{get_node_text, waiting_at_end}; use crate::mdschema::validator::validator_walker::ValidatorWalker; -use crate::{invariant_violation, trace_cursors}; use log::trace; -use mdvalidate_utils::PrettyPrint; use tree_sitter::TreeCursor; /// Validate two tables. @@ -29,207 +24,206 @@ pub(super) struct TableVsTableValidator; impl ValidatorImpl for TableVsTableValidator { fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - validate_impl(walker, got_eof) - } -} + let mut schema_cursor = walker.schema_cursor().clone(); + let mut input_cursor = walker.input_cursor().clone(); -/// Returns true if we should early return with an error (result was modified). -fn goto_next_sibling_pair_or_exit<'a>( - schema_cursor: &TreeCursor<'a>, - input_cursor: &TreeCursor<'a>, - walker: &ValidatorWalker, - got_eof: bool, - result: &mut ValidationResult, -) -> bool { - if !waiting_at_end(got_eof, walker.input_str(), input_cursor) { - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::MalformedNodeStructure { - schema_index: schema_cursor.descendant_index(), - input_index: input_cursor.descendant_index(), - kind: MalformedStructureKind::MismatchingTableCells, - }, - )); - true - } else { - false - } -} - -fn validate_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - let mut schema_cursor = walker.schema_cursor().clone(); - let mut input_cursor = walker.input_cursor().clone(); + let mut result = ValidationResult::from_cursors(&schema_cursor, &input_cursor); + let need_to_restart_result = result.clone(); - let mut result = ValidationResult::from_cursors(&schema_cursor, &input_cursor); - let need_to_restart_result = result.clone(); + // Both should be at tables already + #[cfg(feature = "invariant_violations")] + if !both_are_tables(&schema_cursor.node(), &input_cursor.node()) { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "we should already be at table nodes" + ) + } - // Both should be at tables already - #[cfg(feature = "invariant_violations")] - if !both_are_tables(&schema_cursor.node(), &input_cursor.node()) { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "we should already be at table nodes" - ) - } + if !schema_cursor.goto_first_child() || !input_cursor.goto_first_child() { + #[cfg(feature = "invariant_violations")] + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "we should be able to dive down one layer into a table" + ) + } - if !schema_cursor.goto_first_child() || !input_cursor.goto_first_child() { #[cfg(feature = "invariant_violations")] - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "we should be able to dive down one layer into a table" - ) - } - - #[cfg(feature = "invariant_violations")] - if !both_are_table_headers(&schema_cursor.node(), &input_cursor.node()) { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "the immediate child of all tables should be table header" - ) - } + if !both_are_table_headers(&schema_cursor.node(), &input_cursor.node()) { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "the immediate child of all tables should be table header" + ) + } - result.sync_cursor_pos(&schema_cursor, &input_cursor); - - // (document[0]0..41) - // └─ (table[1]1..40) - // ├─ (table_header_row[2]1..22) <-- we are iterating over these in the outer loop - // │ ├─ (table_cell[3]2..16) <-- we are iterating over these in the inner loop - // │ │ └─ (text[4]2..16) - // │ └─ (table_cell[5]17..21) - // │ └─ (text[6]17..21) - // ├─ (table_delimiter_row[7]23..28) - // │ ├─ (table_column_alignment[8]24..25) - // │ └─ (table_column_alignment[9]26..27) - // └─ (table_data_row[10]29..40) - // ├─ (table_cell[11]30..34) - // │ └─ (text[12]30..34) - // └─ (table_cell[13]35..39) - // └─ (text[14]35..39) - - // General idea: For each row, walk down to the first child, iterate over all its siblings, - // hop back to the row container, go to the next row, until there are no rows left. - - 'row_iter: loop { - { - // Dive in to the first row, iterate over children, hop back (hop - // back is automatic since we use different cursors in the context) + result.sync_cursor_pos(&schema_cursor, &input_cursor); + + // (document[0]0..41) + // └─ (table[1]1..40) + // ├─ (table_header_row[2]1..22) <-- we are iterating over these in the outer loop + // │ ├─ (table_cell[3]2..16) <-- we are iterating over these in the inner loop + // │ │ └─ (text[4]2..16) + // │ └─ (table_cell[5]17..21) + // │ └─ (text[6]17..21) + // ├─ (table_delimiter_row[7]23..28) + // │ ├─ (table_column_alignment[8]24..25) + // │ └─ (table_column_alignment[9]26..27) + // └─ (table_data_row[10]29..40) + // ├─ (table_cell[11]30..34) + // │ └─ (text[12]30..34) + // └─ (table_cell[13]35..39) + // └─ (text[14]35..39) + + // General idea: For each row, walk down to the first child, iterate over all its siblings, + // hop back to the row container, go to the next row, until there are no rows left. + + 'row_iter: loop { { - let mut schema_cursor = schema_cursor.clone(); - let mut input_cursor = input_cursor.clone(); + // Dive in to the first row, iterate over children, hop back (hop + // back is automatic since we use different cursors in the context) + { + let mut schema_cursor = schema_cursor.clone(); + let mut input_cursor = input_cursor.clone(); - match ( - schema_cursor.goto_first_child(), - input_cursor.goto_first_child(), - ) { - (true, true) => { - #[cfg(feature = "invariant_violations")] - if !both_are_table_cells(&schema_cursor.node(), &input_cursor.node()) { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "the immediate child of table headers should be a table cell" - ) + match ( + schema_cursor.goto_first_child(), + input_cursor.goto_first_child(), + ) { + (true, true) => { + #[cfg(feature = "invariant_violations")] + if !both_are_table_cells(&schema_cursor.node(), &input_cursor.node()) { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "the immediate child of table headers should be a table cell" + ) + } } + (false, false) => break 'row_iter, + _ => invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "table is malformed in a way that should be impossible" + ), } - (false, false) => break 'row_iter, - _ => invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "table is malformed in a way that should be impossible" - ), - } - // First check if we are dealing with a special case -- repeated rows! - if both_are_table_data_rows(&schema_cursor.node(), &input_cursor.node()) - && let Some(_bounds) = - try_get_repeated_row_bounds(&schema_cursor, walker.schema_str()) - { - todo!() - } - - // we are at the first cell initially. we validate the first - // cell in the input vs the first cell in the schema, and then - // jump to the next sibling pair. If there is no next sibling - // pair we are done. - 'col_iter: loop { - let cell_result = ContainerVsContainerValidator::default() - .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); - result.join_other_result(&cell_result); + // First check if we are dealing with a special case -- repeated rows! + if both_are_table_data_rows(&schema_cursor.node(), &input_cursor.node()) + && let Some(_bounds) = + try_get_repeated_row_bounds(&schema_cursor, walker.schema_str()) + { + todo!() + } - match ( - schema_cursor.goto_next_sibling(), - input_cursor.goto_next_sibling(), - ) { - (true, true) => {} - (false, false) => break 'col_iter, - (true, false) => { - if goto_next_sibling_pair_or_exit( - &schema_cursor, - &input_cursor, - walker, - got_eof, - &mut result, - ) { - return result; - } else { - return need_to_restart_result; + // we are at the first cell initially. we validate the first + // cell in the input vs the first cell in the schema, and then + // jump to the next sibling pair. If there is no next sibling + // pair we are done. + 'col_iter: loop { + let cell_result = ContainerVsContainerValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); + result.join_other_result(&cell_result); + + match ( + schema_cursor.goto_next_sibling(), + input_cursor.goto_next_sibling(), + ) { + (true, true) => {} + (false, false) => break 'col_iter, + (true, false) => { + if goto_next_sibling_pair_or_exit( + &schema_cursor, + &input_cursor, + walker, + got_eof, + &mut result, + ) { + return result; + } else { + return need_to_restart_result; + } } + (false, true) => {} } - (false, true) => {} } } } - } - 'wait_for_row: loop { - match ( - schema_cursor.goto_next_sibling(), - input_cursor.goto_next_sibling(), - ) { - (true, true) => { - result.keep_farther_pos(&NodePosPair::from_cursors( - &schema_cursor, - &input_cursor, - )); - - if !both_are_table_delimiter_rows(&schema_cursor.node(), &input_cursor.node()) { - break 'wait_for_row; + 'wait_for_row: loop { + match ( + schema_cursor.goto_next_sibling(), + input_cursor.goto_next_sibling(), + ) { + (true, true) => { + result.keep_farther_pos(&NodePosPair::from_cursors( + &schema_cursor, + &input_cursor, + )); + + if !both_are_table_delimiter_rows( + &schema_cursor.node(), + &input_cursor.node(), + ) { + break 'wait_for_row; + } } - } - (false, false) => break 'row_iter, - (true, false) => { - if goto_next_sibling_pair_or_exit( - &schema_cursor, - &input_cursor, - walker, - got_eof, - &mut result, - ) { - return result; - } else { - return need_to_restart_result; + (false, false) => break 'row_iter, + (true, false) => { + if goto_next_sibling_pair_or_exit( + &schema_cursor, + &input_cursor, + walker, + got_eof, + &mut result, + ) { + return result; + } else { + return need_to_restart_result; + } + } + _ => { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "table is malformed in a way that should be impossible" + ) } - } - _ => { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "table is malformed in a way that should be impossible" - ) } } } + + result } +} - result +/// Returns true if we should early return with an error (result was modified). +fn goto_next_sibling_pair_or_exit<'a>( + schema_cursor: &TreeCursor<'a>, + input_cursor: &TreeCursor<'a>, + walker: &ValidatorWalker, + got_eof: bool, + result: &mut ValidationResult, +) -> bool { + if !waiting_at_end(got_eof, walker.input_str(), input_cursor) { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::MalformedNodeStructure { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + kind: MalformedStructureKind::MismatchingTableCells, + }, + )); + true + } else { + false + } } /// We say that a row is repeated if there is a repeater directly after the row. From e45b101209e24394f69bca2dd1be768494a678a0 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 20:24:52 -0500 Subject: [PATCH 10/33] set up outline --- .../node_walker/validators/containers.rs | 14 +- .../node_walker/validators/tables.rs | 306 +++++++++++++++++- 2 files changed, 308 insertions(+), 12 deletions(-) diff --git a/src/mdschema/validator/node_walker/validators/containers.rs b/src/mdschema/validator/node_walker/validators/containers.rs index ceb1e28..2ab1d54 100644 --- a/src/mdschema/validator/node_walker/validators/containers.rs +++ b/src/mdschema/validator/node_walker/validators/containers.rs @@ -93,7 +93,7 @@ impl ValidatorImpl for ContainerVsContainerValidator { ); if is_repeated_matcher_paragraph(&schema_cursor, walker.schema_str()) { - return ParagraphVsRepeatedMatcherParagraphValidator::default() + return RepeatedMatcherParagraphVsParagraphValidator::default() .validate(walker, got_eof); } @@ -236,9 +236,9 @@ impl ValidatorImpl for ContainerVsContainerValidator { /// } /// ``` #[derive(Default)] -pub(super) struct ParagraphVsRepeatedMatcherParagraphValidator; +pub(super) struct RepeatedMatcherParagraphVsParagraphValidator; -impl ValidatorImpl for ParagraphVsRepeatedMatcherParagraphValidator { +impl ValidatorImpl for RepeatedMatcherParagraphVsParagraphValidator { fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), &walker.input_cursor()); @@ -405,7 +405,7 @@ mod tests { errors::{SchemaViolationError, ValidationError}, node_pos_pair::NodePosPair, node_walker::validators::{ - containers::ParagraphVsRepeatedMatcherParagraphValidator, test_utils::ValidatorTester, + containers::RepeatedMatcherParagraphVsParagraphValidator, test_utils::ValidatorTester, }, ts_types::*, ts_utils::parse_markdown, @@ -573,7 +573,7 @@ bar buzz "#; - let result = ValidatorTester::::from_strs( + let result = ValidatorTester::::from_strs( schema_str, input_str, ) .walk() @@ -603,7 +603,7 @@ buzz # Test "#; - let result = ValidatorTester::::from_strs( + let result = ValidatorTester::::from_strs( schema_str, input_str, ) .walk() @@ -632,7 +632,7 @@ bar *italic* buzz "#; - let result = ValidatorTester::::from_strs( + let result = ValidatorTester::::from_strs( schema_str, input_str, ) .walk() diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index 4696845..0ce571a 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -5,8 +5,9 @@ //! and delegates cell content checks to textual container validation. use crate::invariant_violation; use crate::mdschema::validator::errors::{ - MalformedStructureKind, SchemaViolationError, ValidationError, + MalformedStructureKind, NodeContentMismatchKind, SchemaViolationError, ValidationError, }; +use crate::mdschema::validator::matcher::matcher::Matcher; use crate::mdschema::validator::matcher::matcher_extras::MatcherExtras; use crate::mdschema::validator::node_pos_pair::NodePosPair; use crate::mdschema::validator::node_walker::ValidationResult; @@ -16,6 +17,7 @@ use crate::mdschema::validator::ts_types::*; use crate::mdschema::validator::ts_utils::{get_node_text, waiting_at_end}; use crate::mdschema::validator::validator_walker::ValidatorWalker; use log::trace; +use regex::bytes::Match; use tree_sitter::TreeCursor; /// Validate two tables. @@ -116,10 +118,13 @@ impl ValidatorImpl for TableVsTableValidator { // First check if we are dealing with a special case -- repeated rows! if both_are_table_data_rows(&schema_cursor.node(), &input_cursor.node()) - && let Some(_bounds) = + && let Some(bounds) = try_get_repeated_row_bounds(&schema_cursor, walker.schema_str()) { - todo!() + let repeated_row_result = RepeatedRowVsRowValidator::new(bounds) + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); + repeated_row_result + .walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); } // we are at the first cell initially. we validate the first @@ -205,6 +210,8 @@ impl ValidatorImpl for TableVsTableValidator { } /// Returns true if we should early return with an error (result was modified). +/// +/// This is for the case where the input has a child but the schema does not. fn goto_next_sibling_pair_or_exit<'a>( schema_cursor: &TreeCursor<'a>, input_cursor: &TreeCursor<'a>, @@ -226,6 +233,212 @@ fn goto_next_sibling_pair_or_exit<'a>( } } +#[derive(Default)] +pub(super) struct RepeatedRowVsRowValidator { + bounds: (Option, Option), +} + +impl RepeatedRowVsRowValidator { + pub fn new(bounds: (Option, Option)) -> Self { + Self { bounds } + } +} + +impl ValidatorImpl for RepeatedRowVsRowValidator { + fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { + let mut schema_cursor = walker.schema_cursor().clone(); + let mut input_cursor = walker.input_cursor().clone(); + + let mut result = ValidationResult::from_cursors(&schema_cursor, &input_cursor); + let need_to_restart_result = result.clone(); + + #[cfg(feature = "invariant_violations")] + if !both_are_table_data_rows(&schema_cursor.node(), &input_cursor.node()) { + invariant_violation!( + result, + &schema_cursor, + &input_cursor, + "is_repeated_row only works for data row nodes. Title row nodes cannot be repeated. Got {:?}", + schema_cursor.node().kind() + ) + } + + // A version of the schema cursor where it is pointed at the first cell in the (repeating) row + let schema_cursor_at_first_cell = get_cursor_at_first_cell(&schema_cursor); + + let max_bound = self.bounds.1.unwrap_or(usize::MAX); + + let corresponding_matchers = + get_cell_indexes_that_have_simple_matcher(&schema_cursor, walker.schema_str()); + + let corresponding_matchers_only_matchers: Vec<&Matcher> = corresponding_matchers + .iter() + .filter_map(|n| n.as_ref()) + .collect(); + let num_corresponding_matchers = corresponding_matchers_only_matchers.len(); + + let mut all_matches: Vec> = vec![Vec::new(); num_corresponding_matchers]; + + 'row_iter: for _ in 0..max_bound { + let mut schema_cursor_at_first_cell = schema_cursor_at_first_cell.clone(); + + // Validate the entire row + { + let mut input_cursor_at_first_cell = get_cursor_at_first_cell(&input_cursor); + + let mut matcher_num = 0; + 'col_iter: for i in 0.. { + let cell_str = get_node_text(&input_cursor.node(), walker.input_str()); + + match corresponding_matchers.get(i).unwrap() { + Some(matcher) => match matcher.match_str(cell_str) { + Some(captured_str) => { + all_matches + .get_mut(matcher_num) + .unwrap() // we pre filled it properly ahead of time + .push(captured_str.to_string()); + + matcher_num += 1; + } + None => { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: schema_cursor_at_first_cell + .descendant_index(), + input_index: input_cursor_at_first_cell.descendant_index(), + expected: matcher.pattern().to_string(), + actual: cell_str.into(), + kind: NodeContentMismatchKind::Matcher, + }, + )); + + return result; + } + }, + None => { + // Validate the cell as a normal container. + let cell_result = ContainerVsContainerValidator::default().validate( + &walker.with_cursors(&schema_cursor, &input_cursor), + got_eof, + ); + result.join_data(cell_result.data()); + if cell_result.has_errors() { + result.join_errors(cell_result.errors()); + return result; + } + } + } + + match ( + schema_cursor_at_first_cell.goto_next_sibling(), + input_cursor_at_first_cell.goto_next_sibling(), + ) { + (true, true) => {} + (false, false) => break 'col_iter, + (true, false) => { + if goto_next_sibling_pair_or_exit( + &schema_cursor, + &input_cursor, + walker, + got_eof, + &mut result, + ) { + return result; + } else { + return need_to_restart_result; + } + } + (false, true) => {} + } + } + } + + // Move the input to the next row (the schema stays put!) + if input_cursor.goto_next_sibling() { + // continue! + // TODO: should we check bounds? + } else { + break 'row_iter; + } + } + + // TODO: bound checking + // let min_bound = self.bounds.0.unwrap_or(0); + + for (matches, matcher) in all_matches.iter().zip(corresponding_matchers_only_matchers) { + if let Some(key) = matcher.id() { + result.set_match(key, matches.clone().into()); + } + } + + result + } +} + +/// For each cell, check if it is a single simple matcher. If it is, load a +/// Some(Matcher) with that matcher into a Vec, and if it is not, load a None +/// instead. +/// +/// # Arguments +/// +/// * `schema_cursor` - A cursor pointing to the first cell in the repeating schema row. +/// * `schema_str` - The string representation of the schema. +/// +/// # Returns +/// +/// A vector of `Some`s of the cells that have a single matcher. +fn get_cell_indexes_that_have_simple_matcher( + schema_cursor: &TreeCursor, + schema_str: &str, +) -> Vec> { + #[cfg(feature = "invariant_violations")] + if !is_table_cell_node(&schema_cursor.node()) { + invariant_violation!("we should start at the first cell in the repeating row in the table",) + } + + let mut schema_cursor = schema_cursor.clone(); + + let mut indexes = Vec::new(); + + loop { + // For it to be a "simple" matcher with nothing else, it must ONLY have + // a single child, which is a code node. + let single_child_that_is_code = schema_cursor.node().child_count() == 1 + && is_inline_code_node(&schema_cursor.node().child(0).unwrap()); + + if single_child_that_is_code { + if let Ok(matcher) = Matcher::try_from_schema_cursor(&schema_cursor, schema_str) { + indexes.push(Some(matcher)); + } else { + indexes.push(None); + } + } else { + indexes.push(None); + } + + if schema_cursor.goto_next_sibling() { + // continue! + } else { + break; + } + } + + indexes +} + +/// Walk down to the first node, and debug assert that it is a table cell. +fn get_cursor_at_first_cell<'a>(cursor: &TreeCursor<'a>) -> TreeCursor<'a> { + let mut cursor = cursor.clone(); + cursor.goto_first_child(); + + #[cfg(feature = "invariant_violations")] + if !is_table_cell_node(&cursor.node()) { + invariant_violation!("the descendant of the cursor here should be a table cell",) + } + + cursor +} + /// We say that a row is repeated if there is a repeater directly after the row. /// /// Example: @@ -310,6 +523,37 @@ mod tests { }; use serde_json::json; + #[test] + fn get_cell_indexes_that_have_simple_matcher_simple() { + // just has one matcher + let schema_str = r#" +|c1|c2|c3|c4|c5| +|-|-|-|-|-| +|r1|`foo:/test/`|`bar:/test2/`|not a matcher|`baz:/test3/`| + "#; + + let schema_tree = parse_markdown(schema_str).unwrap(); + let mut schema_cursor = schema_tree.walk(); + schema_cursor.goto_first_child(); // document -> table + schema_cursor.goto_first_child(); // table -> header row + schema_cursor.goto_next_sibling(); // header row -> delimiter row + schema_cursor.goto_next_sibling(); // delimiter row -> data row + assert!(is_table_data_row_node(&schema_cursor.node())); + schema_cursor.goto_first_child(); // data row -> table cell + assert!(is_table_cell_node(&schema_cursor.node())); + + assert_eq!( + get_cell_indexes_that_have_simple_matcher(&schema_cursor, schema_str), + vec![ + None, + Some(Matcher::try_from_pattern_and_suffix_str("`foo:/test/`", None).unwrap()), + Some(Matcher::try_from_pattern_and_suffix_str("`bar:/test2/`", None).unwrap()), + None, + Some(Matcher::try_from_pattern_and_suffix_str("`baz:/test3/`", None).unwrap()) + ] + ) + } + #[test] fn test_is_repeated_row_is_repeated() { let schema_str = r#" @@ -526,7 +770,7 @@ mod tests { } )] ); - assert_eq!(result.value(), &json!({})); + assert_eq!(*result.value(), json!({})); } #[test] @@ -549,6 +793,58 @@ mod tests { .validate_complete(); assert_eq!(result.errors(), vec![]); - assert_eq!(result.value(), &json!({"c1": "buzz"})); + assert_eq!(*result.value(), json!({"c1": "buzz"})); + } + + #[test] + fn test_validate_table_vs_table_with_repeated_cell() { + let schema_str = r#" +|c2|c2| +|-|-| +|`a:/.*/`|`b:/.*/`|{,} + "#; + let input_str = r#" +|c2|c2| +|-|-| +|a1|b1| +|a2|b2| +"#; + + let result = ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_tables(s, i))) + .validate_complete(); + + assert_eq!(result.errors(), vec![]); + assert_eq!( + *result.value(), + json!({"a": ["a1", "a2"], "b": ["b1", "b2"]}) + ); + } + + #[test] + #[ignore] + fn test_validate_table_vs_table_with_repeated_cell_and_mismatch() { + let schema_str = r#" +|c2|c2| +|-|-| +|`a:/.*/`|`b:/xx/`|{,} + "#; + let input_str = r#" +|c2|c2| +|-|-| +|a1|b1| +|a2|b2| +"#; + + let result = ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_tables(s, i))) + .validate_complete(); + + assert_eq!(result.errors().len(), 1); + todo!("check specific type of error"); } } From cb2ab128aaab4b40e593b1da84f845de540ea349 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 20:28:50 -0500 Subject: [PATCH 11/33] set up more tests --- .../node_walker/validators/tables.rs | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index 0ce571a..5ed4639 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -797,6 +797,37 @@ mod tests { } #[test] + fn test_validate_repeated_row_vs_row_simple() { + let schema_str = r#" +|c2|c2| +|-|-| +|`a:/.*/`|`b:/.*/`|{,} +"#; + let input_str = r#" +|c2|c2| +|-|-| +|a1|b1| +|a2|b2| +"#; + + let result = ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() // document -> table + .goto_first_child_then_unwrap() // table -> header row + .goto_next_sibling_then_unwrap() // header row -> delimiter row + .goto_next_sibling_then_unwrap() // delimiter row -> data row + .peek_nodes(|(s, i)| assert!(both_are_table_data_rows(s, i))) + .validate_complete(); + + assert_eq!(result.errors(), vec![]); + assert_eq!( + *result.value(), + json!({"a": ["a1", "a2"], "b": ["b1", "b2"]}) + ); + } + + #[test] + #[ignore] fn test_validate_table_vs_table_with_repeated_cell() { let schema_str = r#" |c2|c2| From 48e0b97a8b2cc3ec91307b858918c5804482ea01 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 21:06:22 -0500 Subject: [PATCH 12/33] change example and more progress on repeating matchers --- examples/cli/input.md | 24 ++++++++++-- examples/cli/schema.md | 21 +++++++++- src/mdschema/validator/matcher/matcher.rs | 24 ++++++++---- .../validator/node_walker/validators/lists.rs | 5 +-- .../node_walker/validators/matchers.rs | 6 +-- .../validator/node_walker/validators/mod.rs | 33 +++++++++++----- .../validator/node_walker/validators/nodes.rs | 2 +- .../node_walker/validators/quotes.rs | 6 +-- .../node_walker/validators/tables.rs | 38 ++++++++++++------- 9 files changed, 112 insertions(+), 47 deletions(-) diff --git a/examples/cli/input.md b/examples/cli/input.md index 839fe4a..79113d3 100644 --- a/examples/cli/input.md +++ b/examples/cli/input.md @@ -1,5 +1,23 @@ -test +# Has 1 to 3 paragraphs -test +test1 -test +test2 + +test3 + +|c2|c2| +|-|-| +|`a:/.*/`|`b:/.*/`|{,} + +--- + +Output is + +```json +{ +"num": "1", +"test": ["test1", "test2", "test3"] +"a": ["a1", "a2"], "b": ["b1", "b2"] +} +``` diff --git a/examples/cli/schema.md b/examples/cli/schema.md index 442d3b8..9f9e039 100644 --- a/examples/cli/schema.md +++ b/examples/cli/schema.md @@ -1 +1,20 @@ -`test:/test/`{,} +# Has `num:/d/` to 3 paragraphs + +`test:/test\d/`{1,3} + +|c2|c2| +|-|-| +|a1|b1| +|a2|b2| + +--- + +Output is + +```json +{ +"num": "1", +"test": ["test1", "test2", "test3"] +"a": ["a1", "a2"], "b": ["b1", "b2"] +} +``` diff --git a/src/mdschema/validator/matcher/matcher.rs b/src/mdschema/validator/matcher/matcher.rs index c5f71b0..3dc05ed 100644 --- a/src/mdschema/validator/matcher/matcher.rs +++ b/src/mdschema/validator/matcher/matcher.rs @@ -14,8 +14,9 @@ use crate::mdschema::validator::{ static ID_PATTERN: LazyLock = LazyLock::new(|| Regex::new(r"^[a-zA-Z0-9-_]+$").unwrap()); -static REGEX_MATCHER_PATTERN: LazyLock = - LazyLock::new(|| Regex::new(r"^(?:(?P[a-zA-Z0-9-_]+):)?(?:\/(?P.+?)\/|(?P[a-zA-Z0-9-_]+))$").unwrap()); +static REGEX_MATCHER_PATTERN: LazyLock = LazyLock::new(|| { + Regex::new(r"^(?:(?P[a-zA-Z0-9-_]+):)?(?:\/(?P.+?)\/|(?P[a-zA-Z0-9-_]+))$").unwrap() +}); static RANGE_PATTERN: LazyLock = LazyLock::new(|| Regex::new(r"\{(\d*),(\d*)\}").unwrap()); @@ -315,7 +316,9 @@ fn extract_id_and_pattern( } // Otherwise, we have a regex pattern (e.g., `id:/regex/` or `/regex/`) - let id = captures.name("id_with_regex").map(|m| m.as_str().to_string()); + let id = captures + .name("id_with_regex") + .map(|m| m.as_str().to_string()); let regex_pattern = captures .name("regex") .map(|m| m.as_str().to_string()) @@ -327,9 +330,10 @@ fn extract_id_and_pattern( })?; // Create a regex matcher from the pattern - let matcher = MatcherKind::from_regex(Regex::new(&format!("^{}", regex_pattern)).map_err(|e| { - MatcherError::MatcherInteriorRegexInvalid(format!("Invalid regex pattern: {}", e)) - })?); + let matcher = + MatcherKind::from_regex(Regex::new(&format!("^{}", regex_pattern)).map_err(|e| { + MatcherError::MatcherInteriorRegexInvalid(format!("Invalid regex pattern: {}", e)) + })?); Ok((id, matcher)) } @@ -469,13 +473,17 @@ mod tests { // Matches everything including spaces and special characters assert_eq!(matcher.match_str("valid-later"), Some("valid-later")); assert_eq!(matcher.match_str("test@symbol"), Some("test@symbol")); - assert_eq!(matcher.match_str("anything at all!"), Some("anything at all!")); + assert_eq!( + matcher.match_str("anything at all!"), + Some("anything at all!") + ); } #[test] fn test_matcher_invalid_pattern() { // Test error handling for truly invalid pattern (invalid chars for ID, not a regex) - let result = Matcher::try_from_pattern_and_suffix_str("`invalid pattern with spaces`", None); + let result = + Matcher::try_from_pattern_and_suffix_str("`invalid pattern with spaces`", None); assert!(result.is_err()); match result.as_ref().unwrap_err() { MatcherError::MatcherInteriorRegexInvalid(_) => { diff --git a/src/mdschema/validator/node_walker/validators/lists.rs b/src/mdschema/validator/node_walker/validators/lists.rs index d289733..2e9d3d0 100644 --- a/src/mdschema/validator/node_walker/validators/lists.rs +++ b/src/mdschema/validator/node_walker/validators/lists.rs @@ -10,10 +10,7 @@ use crate::mdschema::validator::{ matcher::matcher::{Matcher, MatcherError}, node_walker::{ ValidationResult, - validators::{ - Validator, ValidatorImpl, - containers::{ContainerVsContainerValidatorBuilder}, - }, + validators::{Validator, ValidatorImpl, containers::ContainerVsContainerValidatorBuilder}, }, ts_types::*, ts_utils::{ diff --git a/src/mdschema/validator/node_walker/validators/matchers.rs b/src/mdschema/validator/node_walker/validators/matchers.rs index d07ee0d..d2bd55e 100644 --- a/src/mdschema/validator/node_walker/validators/matchers.rs +++ b/src/mdschema/validator/node_walker/validators/matchers.rs @@ -312,10 +312,8 @@ impl ValidatorImpl for MatcherVsTextValidator { } // Delegate to the literal matcher validator - return LiteralMatcherVsTextualValidator::default().validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + return LiteralMatcherVsTextualValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } _ => result.add_error(ValidationError::SchemaError(SchemaError::MatcherError { error, diff --git a/src/mdschema/validator/node_walker/validators/mod.rs b/src/mdschema/validator/node_walker/validators/mod.rs index e617458..7aed4bc 100644 --- a/src/mdschema/validator/node_walker/validators/mod.rs +++ b/src/mdschema/validator/node_walker/validators/mod.rs @@ -33,10 +33,10 @@ pub(super) mod quotes; pub(super) mod tables; pub(super) mod textual; -pub trait ValidatorImpl: Default { +pub trait ValidatorImpl { fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult; } -pub trait Validator: Default { +pub trait Validator { fn validate(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult; } @@ -63,7 +63,7 @@ mod test_utils { use super::*; pub struct ValidatorTester<'a, V: Validator> { - _phantom: std::marker::PhantomData, + validator: V, schema_tree: Tree, schema_str: &'a str, input_tree: Tree, @@ -71,12 +71,12 @@ mod test_utils { } impl<'a, V: Validator> ValidatorTester<'a, V> { - pub fn from_strs(schema_str: &'a str, input_str: &'a str) -> Self { + pub fn with_validator(schema_str: &'a str, input_str: &'a str, validator: V) -> Self { let schema_tree = parse_markdown(schema_str).unwrap(); let input_tree = parse_markdown(input_str).unwrap(); Self { - _phantom: std::marker::PhantomData, + validator, schema_tree, schema_str, input_tree, @@ -84,12 +84,27 @@ mod test_utils { } } + pub fn from_strs_and_validator( + schema_str: &'a str, + input_str: &'a str, + validator: V, + ) -> Self { + Self::with_validator(schema_str, input_str, validator) + } + + pub fn from_strs(schema_str: &'a str, input_str: &'a str) -> Self + where + V: Default, + { + Self::with_validator(schema_str, input_str, V::default()) + } + pub fn walk(&'_ self) -> ValidationTesterWalker<'_, V> { let schema_cursor = self.schema_tree.walk(); let input_cursor = self.input_tree.walk(); ValidationTesterWalker { - _phantom: std::marker::PhantomData, + validator: &self.validator, schema_cursor, schema_str: self.schema_str, input_cursor, @@ -99,14 +114,14 @@ mod test_utils { } pub struct ValidationTesterWalker<'a, V: Validator> { - _phantom: std::marker::PhantomData, + validator: &'a V, schema_cursor: TreeCursor<'a>, schema_str: &'a str, input_cursor: TreeCursor<'a>, input_str: &'a str, } - impl<'a, V: Validator + Default> ValidationTesterWalker<'a, V> { + impl<'a, V: Validator> ValidationTesterWalker<'a, V> { pub fn validate(&mut self, got_eof: bool) -> ValidationResult { self.print(); @@ -116,7 +131,7 @@ mod test_utils { &self.input_cursor, self.input_str, ); - V::default().validate(&walker, got_eof) + self.validator.validate(&walker, got_eof) } pub fn validate_complete(&mut self) -> ValidationResult { diff --git a/src/mdschema/validator/node_walker/validators/nodes.rs b/src/mdschema/validator/node_walker/validators/nodes.rs index a17804c..dff3eba 100644 --- a/src/mdschema/validator/node_walker/validators/nodes.rs +++ b/src/mdschema/validator/node_walker/validators/nodes.rs @@ -116,7 +116,7 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida trace!("Both are heading nodes or document nodes. Recursing into sibling pairs."); // Since we're dealing with top level nodes it is our responsibility to ensure that they have the same number of children. - compare_node_children_lengths_check!(schema_cursor, input_cursor, got_eof, result); + // compare_node_children_lengths_check!(schema_cursor, input_cursor, got_eof, result); let parent_pos = NodePosPair::from_cursors(&schema_cursor, &input_cursor); diff --git a/src/mdschema/validator/node_walker/validators/quotes.rs b/src/mdschema/validator/node_walker/validators/quotes.rs index ff2b30f..b3b66b8 100644 --- a/src/mdschema/validator/node_walker/validators/quotes.rs +++ b/src/mdschema/validator/node_walker/validators/quotes.rs @@ -61,10 +61,8 @@ impl ValidatorImpl for QuoteVsQuoteValidator { } // Delegate to TextualContainerVsTextualContainerValidator for the children - return ContainerVsContainerValidator::default().validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); + return ContainerVsContainerValidator::default() + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } } diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index 5ed4639..32b0616 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -121,7 +121,7 @@ impl ValidatorImpl for TableVsTableValidator { && let Some(bounds) = try_get_repeated_row_bounds(&schema_cursor, walker.schema_str()) { - let repeated_row_result = RepeatedRowVsRowValidator::new(bounds) + let repeated_row_result = RepeatedRowVsRowValidator::from_bounds(bounds) .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); repeated_row_result .walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); @@ -233,13 +233,12 @@ fn goto_next_sibling_pair_or_exit<'a>( } } -#[derive(Default)] pub(super) struct RepeatedRowVsRowValidator { bounds: (Option, Option), } impl RepeatedRowVsRowValidator { - pub fn new(bounds: (Option, Option)) -> Self { + pub fn from_bounds(bounds: (Option, Option)) -> Self { Self { bounds } } } @@ -268,8 +267,17 @@ impl ValidatorImpl for RepeatedRowVsRowValidator { let max_bound = self.bounds.1.unwrap_or(usize::MAX); - let corresponding_matchers = - get_cell_indexes_that_have_simple_matcher(&schema_cursor, walker.schema_str()); + let corresponding_matchers = { + let mut schema_cursor = schema_cursor.clone(); + let had_first_child = schema_cursor.goto_first_child(); + + #[cfg(feature = "invariant_violations")] + if !had_first_child { + invariant_violation!("should have had first child") + } + + get_cell_indexes_that_have_simple_matcher(&schema_cursor, walker.schema_str()) + }; let corresponding_matchers_only_matchers: Vec<&Matcher> = corresponding_matchers .iter() @@ -810,14 +818,18 @@ mod tests { |a2|b2| "#; - let result = ValidatorTester::::from_strs(schema_str, input_str) - .walk() - .goto_first_child_then_unwrap() // document -> table - .goto_first_child_then_unwrap() // table -> header row - .goto_next_sibling_then_unwrap() // header row -> delimiter row - .goto_next_sibling_then_unwrap() // delimiter row -> data row - .peek_nodes(|(s, i)| assert!(both_are_table_data_rows(s, i))) - .validate_complete(); + let result = ValidatorTester::from_strs_and_validator( + schema_str, + input_str, + RepeatedRowVsRowValidator::from_bounds((None, None)), + ) + .walk() + .goto_first_child_then_unwrap() // document -> table + .goto_first_child_then_unwrap() // table -> header row + .goto_next_sibling_then_unwrap() // header row -> delimiter row + .goto_next_sibling_then_unwrap() // delimiter row -> data row + .peek_nodes(|(s, i)| assert!(both_are_table_data_rows(s, i))) + .validate_complete(); assert_eq!(result.errors(), vec![]); assert_eq!( From 51dfeaf7b7cee0801be8d24504d7b9cf056ad074 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 22:37:13 -0500 Subject: [PATCH 13/33] fix up old tests --- src/mdschema/validator/errors.rs | 2 + .../validator/node_walker/validators/nodes.rs | 86 +++++++++++++++---- .../node_walker/validators/tables.rs | 30 ++----- src/mdschema/validator/validator.rs | 14 ++- tests/misc.rs | 9 +- tests/rulers.rs | 11 +-- 6 files changed, 91 insertions(+), 61 deletions(-) diff --git a/src/mdschema/validator/errors.rs b/src/mdschema/validator/errors.rs index c435375..e814216 100644 --- a/src/mdschema/validator/errors.rs +++ b/src/mdschema/validator/errors.rs @@ -446,6 +446,8 @@ pub enum MalformedStructureKind { MissingListItemContent, HadExtraListItem, MismatchingTableCells, + SchemaHasChildInputDoesnt, + InputHasChildSchemaDoesnt, } impl fmt::Display for SchemaViolationError { diff --git a/src/mdschema/validator/node_walker/validators/nodes.rs b/src/mdschema/validator/node_walker/validators/nodes.rs index dff3eba..f31aa6a 100644 --- a/src/mdschema/validator/node_walker/validators/nodes.rs +++ b/src/mdschema/validator/node_walker/validators/nodes.rs @@ -5,6 +5,9 @@ //! based on node kinds and performs shared structural checks. use log::trace; +use crate::mdschema::validator::errors::{ + MalformedStructureKind, SchemaViolationError, ValidationError, +}; use crate::mdschema::validator::node_pos_pair::NodePosPair; use crate::mdschema::validator::node_walker::ValidationResult; use crate::mdschema::validator::node_walker::validators::code::CodeVsCodeValidator; @@ -19,7 +22,7 @@ use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorIm use crate::mdschema::validator::ts_types::*; use crate::mdschema::validator::ts_utils::waiting_at_end; use crate::mdschema::validator::validator_walker::ValidatorWalker; -use crate::{compare_node_children_lengths_check, compare_node_kinds_check, invariant_violation}; +use crate::{compare_node_kinds_check, invariant_violation}; /// Validate two arbitrary nodes against each other. /// @@ -136,8 +139,33 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida result.set_farthest_reached_pos(parent_pos); return result; } - (true, false) => todo!(), - (false, true) => todo!(), + (false, true) => { + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + // okay, we'll just wait! + } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::MalformedNodeStructure { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + kind: MalformedStructureKind::InputHasChildSchemaDoesnt, + }, + )); + } + } + (true, false) => { + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + // okay, we'll just wait! + } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::MalformedNodeStructure { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + kind: MalformedStructureKind::SchemaHasChildInputDoesnt, + }, + )); + } + return result; + } (false, false) => { return result; // nothing left } @@ -159,8 +187,33 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida result.set_farthest_reached_pos(parent_pos); return result; } - (true, false) => todo!(), - (false, true) => todo!(), + (false, true) => { + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + // okay, we'll just wait! + } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::MalformedNodeStructure { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + kind: MalformedStructureKind::InputHasChildSchemaDoesnt, + }, + )); + } + } + (true, false) => { + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + // okay, we'll just wait! + } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::MalformedNodeStructure { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + kind: MalformedStructureKind::SchemaHasChildInputDoesnt, + }, + )); + } + return result; + } (false, false) => break, } } @@ -201,7 +254,7 @@ mod tests { use super::super::test_utils::ValidatorTester; use super::NodeVsNodeValidator; use crate::mdschema::validator::{ - errors::{SchemaViolationError, ValidationError}, + errors::{MalformedStructureKind, SchemaViolationError, ValidationError}, node_pos_pair::NodePosPair, ts_types::both_are_paragraphs, }; @@ -389,19 +442,16 @@ mod tests { .walk() .validate_complete(); - assert_ne!(result.errors(), []); - - match result.errors().first() { - Some(error) => match error { - ValidationError::SchemaViolation( - SchemaViolationError::ChildrenLengthMismatch { expected, .. }, - ) => { - assert_eq!(expected.0, 0, "expected should be 0 for empty schema"); + assert_eq!( + result.errors(), + &vec![ValidationError::SchemaViolation( + SchemaViolationError::MalformedNodeStructure { + schema_index: 0, + input_index: 1, + kind: MalformedStructureKind::InputHasChildSchemaDoesnt, } - _ => panic!("Expected ChildrenLengthMismatch error, got: {:?}", error), - }, - None => panic!("Expected error"), - } + )] + ); } #[test] diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index 32b0616..dbb3820 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -296,7 +296,8 @@ impl ValidatorImpl for RepeatedRowVsRowValidator { let mut matcher_num = 0; 'col_iter: for i in 0.. { - let cell_str = get_node_text(&input_cursor.node(), walker.input_str()); + let cell_str = + get_node_text(&input_cursor_at_first_cell.node(), walker.input_str()); match corresponding_matchers.get(i).unwrap() { Some(matcher) => match matcher.match_str(cell_str) { @@ -337,35 +338,14 @@ impl ValidatorImpl for RepeatedRowVsRowValidator { } } - match ( - schema_cursor_at_first_cell.goto_next_sibling(), - input_cursor_at_first_cell.goto_next_sibling(), - ) { - (true, true) => {} - (false, false) => break 'col_iter, - (true, false) => { - if goto_next_sibling_pair_or_exit( - &schema_cursor, - &input_cursor, - walker, - got_eof, - &mut result, - ) { - return result; - } else { - return need_to_restart_result; - } - } - (false, true) => {} + if !input_cursor_at_first_cell.goto_next_sibling() { + break 'col_iter; } } } // Move the input to the next row (the schema stays put!) - if input_cursor.goto_next_sibling() { - // continue! - // TODO: should we check bounds? - } else { + if !input_cursor.goto_next_sibling() { break 'row_iter; } } diff --git a/src/mdschema/validator/validator.rs b/src/mdschema/validator/validator.rs index 4041a1c..1c56606 100644 --- a/src/mdschema/validator/validator.rs +++ b/src/mdschema/validator/validator.rs @@ -429,17 +429,15 @@ fooobar assert_eq!(errors.len(), 1); match &errors[0] { - ValidationError::SchemaViolation(SchemaViolationError::ChildrenLengthMismatch { + ValidationError::SchemaViolation(SchemaViolationError::MalformedNodeStructure { schema_index, - input_index: _, - expected, - actual, + input_index, + kind, }) => { - assert_eq!(*expected, 3); - assert_eq!(*actual, 2); - assert_eq!(*schema_index, 0); + assert_eq!(*schema_index, 7); + assert_eq!(*input_index, 5); } - _ => panic!("Expected ChildrenLengthMismatch error, got {:?}", errors[0]), + _ => panic!("Expected MalformedNodeStructure error, got {:?}", errors[0]), } } diff --git a/tests/misc.rs b/tests/misc.rs index 746ba4e..e734665 100644 --- a/tests/misc.rs +++ b/tests/misc.rs @@ -3,7 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{SchemaViolationError, ValidationError}; +use mdvalidate::mdschema::validator::errors::{MalformedStructureKind, SchemaViolationError, ValidationError}; test_case!( node_heading_and_paragraph, @@ -27,11 +27,10 @@ test_case!( r#"# Hi"#, json!({}), vec![ValidationError::SchemaViolation( - SchemaViolationError::ChildrenLengthMismatch { + SchemaViolationError::MalformedNodeStructure { schema_index: 0, - input_index: 0, - expected: 0.into(), - actual: 1, + input_index: 1, + kind: MalformedStructureKind::InputHasChildSchemaDoesnt, } )] ); diff --git a/tests/rulers.rs b/tests/rulers.rs index 744c06d..be1e826 100644 --- a/tests/rulers.rs +++ b/tests/rulers.rs @@ -3,7 +3,9 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{SchemaViolationError, ValidationError}; +use mdvalidate::mdschema::validator::errors::{ + MalformedStructureKind, SchemaViolationError, ValidationError, +}; test_case!(ruler_dashes, r#"---"#, r#"---"#, json!({}), vec![]); @@ -13,11 +15,10 @@ test_case!( r#""#, json!({}), vec![ValidationError::SchemaViolation( - SchemaViolationError::ChildrenLengthMismatch { - schema_index: 0, + SchemaViolationError::MalformedNodeStructure { + schema_index: 1, input_index: 0, - expected: 1.into(), - actual: 0, + kind: MalformedStructureKind::SchemaHasChildInputDoesnt, } )] ); From 4566b310c0d0ba7160271cd3705c9737c248550f Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 23:14:42 -0500 Subject: [PATCH 14/33] fix positioning --- .../validator/node_walker/validators/nodes.rs | 5 + .../node_walker/validators/tables.rs | 134 +++++++++--------- 2 files changed, 72 insertions(+), 67 deletions(-) diff --git a/src/mdschema/validator/node_walker/validators/nodes.rs b/src/mdschema/validator/node_walker/validators/nodes.rs index f31aa6a..408bcad 100644 --- a/src/mdschema/validator/node_walker/validators/nodes.rs +++ b/src/mdschema/validator/node_walker/validators/nodes.rs @@ -42,6 +42,7 @@ impl ValidatorImpl for NodeVsNodeValidator { fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); + let need_to_restart_result = result.clone(); let schema_node = walker.schema_cursor().node(); let input_node = walker.input_cursor().node(); @@ -142,6 +143,7 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida (false, true) => { if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { // okay, we'll just wait! + return need_to_restart_result; } else { result.add_error(ValidationError::SchemaViolation( SchemaViolationError::MalformedNodeStructure { @@ -155,6 +157,7 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida (true, false) => { if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { // okay, we'll just wait! + return need_to_restart_result; } else { result.add_error(ValidationError::SchemaViolation( SchemaViolationError::MalformedNodeStructure { @@ -190,6 +193,7 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida (false, true) => { if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { // okay, we'll just wait! + return need_to_restart_result; } else { result.add_error(ValidationError::SchemaViolation( SchemaViolationError::MalformedNodeStructure { @@ -203,6 +207,7 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida (true, false) => { if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { // okay, we'll just wait! + return need_to_restart_result; } else { result.add_error(ValidationError::SchemaViolation( SchemaViolationError::MalformedNodeStructure { diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index dbb3820..9b88970 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -3,7 +3,6 @@ //! Types: //! - `TableVsTableValidator`: validates table structure (rows, headers, cells) //! and delegates cell content checks to textual container validation. -use crate::invariant_violation; use crate::mdschema::validator::errors::{ MalformedStructureKind, NodeContentMismatchKind, SchemaViolationError, ValidationError, }; @@ -16,8 +15,8 @@ use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorIm use crate::mdschema::validator::ts_types::*; use crate::mdschema::validator::ts_utils::{get_node_text, waiting_at_end}; use crate::mdschema::validator::validator_walker::ValidatorWalker; +use crate::{invariant_violation, trace_cursors}; use log::trace; -use regex::bytes::Match; use tree_sitter::TreeCursor; /// Validate two tables. @@ -92,6 +91,21 @@ impl ValidatorImpl for TableVsTableValidator { let mut schema_cursor = schema_cursor.clone(); let mut input_cursor = input_cursor.clone(); + // First check if we are dealing with a special case -- repeated rows! + trace_cursors!(schema_cursor, input_cursor); + if both_are_table_data_rows(&schema_cursor.node(), &input_cursor.node()) + && let Some(bounds) = + try_get_repeated_row_bounds(&schema_cursor, walker.schema_str()) + { + // trace_cursors!(schema_cursor, input_cursor); + let repeated_row_result = RepeatedRowVsRowValidator::from_bounds(bounds) + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); + repeated_row_result + .walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); + dbg!(repeated_row_result); + trace_cursors!(schema_cursor, input_cursor); + } + match ( schema_cursor.goto_first_child(), input_cursor.goto_first_child(), @@ -116,17 +130,6 @@ impl ValidatorImpl for TableVsTableValidator { ), } - // First check if we are dealing with a special case -- repeated rows! - if both_are_table_data_rows(&schema_cursor.node(), &input_cursor.node()) - && let Some(bounds) = - try_get_repeated_row_bounds(&schema_cursor, walker.schema_str()) - { - let repeated_row_result = RepeatedRowVsRowValidator::from_bounds(bounds) - .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); - repeated_row_result - .walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); - } - // we are at the first cell initially. we validate the first // cell in the input vs the first cell in the schema, and then // jump to the next sibling pair. If there is no next sibling @@ -142,20 +145,34 @@ impl ValidatorImpl for TableVsTableValidator { ) { (true, true) => {} (false, false) => break 'col_iter, - (true, false) => { - if goto_next_sibling_pair_or_exit( - &schema_cursor, - &input_cursor, - walker, - got_eof, - &mut result, - ) { - return result; + (false, true) => { + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + // okay, we'll just wait! } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::MalformedNodeStructure { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + kind: MalformedStructureKind::InputHasChildSchemaDoesnt, + }, + )); + } + } + (true, false) => { + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + // okay, we'll just wait! return need_to_restart_result; + } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::MalformedNodeStructure { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + kind: MalformedStructureKind::SchemaHasChildInputDoesnt, + }, + )); } + return result; } - (false, true) => {} } } } @@ -180,26 +197,34 @@ impl ValidatorImpl for TableVsTableValidator { } } (false, false) => break 'row_iter, - (true, false) => { - if goto_next_sibling_pair_or_exit( - &schema_cursor, - &input_cursor, - walker, - got_eof, - &mut result, - ) { - return result; - } else { + (false, true) => { + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + // okay, we'll just wait! return need_to_restart_result; + } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::MalformedNodeStructure { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + kind: MalformedStructureKind::InputHasChildSchemaDoesnt, + }, + )); } } - _ => { - invariant_violation!( - result, - &schema_cursor, - &input_cursor, - "table is malformed in a way that should be impossible" - ) + (true, false) => { + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + // okay, we'll just wait! + return need_to_restart_result; + } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::MalformedNodeStructure { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + kind: MalformedStructureKind::SchemaHasChildInputDoesnt, + }, + )); + } + return result; } } } @@ -209,30 +234,6 @@ impl ValidatorImpl for TableVsTableValidator { } } -/// Returns true if we should early return with an error (result was modified). -/// -/// This is for the case where the input has a child but the schema does not. -fn goto_next_sibling_pair_or_exit<'a>( - schema_cursor: &TreeCursor<'a>, - input_cursor: &TreeCursor<'a>, - walker: &ValidatorWalker, - got_eof: bool, - result: &mut ValidationResult, -) -> bool { - if !waiting_at_end(got_eof, walker.input_str(), input_cursor) { - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::MalformedNodeStructure { - schema_index: schema_cursor.descendant_index(), - input_index: input_cursor.descendant_index(), - kind: MalformedStructureKind::MismatchingTableCells, - }, - )); - true - } else { - false - } -} - pub(super) struct RepeatedRowVsRowValidator { bounds: (Option, Option), } @@ -245,7 +246,7 @@ impl RepeatedRowVsRowValidator { impl ValidatorImpl for RepeatedRowVsRowValidator { fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - let mut schema_cursor = walker.schema_cursor().clone(); + let schema_cursor = walker.schema_cursor().clone(); let mut input_cursor = walker.input_cursor().clone(); let mut result = ValidationResult::from_cursors(&schema_cursor, &input_cursor); @@ -288,7 +289,7 @@ impl ValidatorImpl for RepeatedRowVsRowValidator { let mut all_matches: Vec> = vec![Vec::new(); num_corresponding_matchers]; 'row_iter: for _ in 0..max_bound { - let mut schema_cursor_at_first_cell = schema_cursor_at_first_cell.clone(); + let schema_cursor_at_first_cell = schema_cursor_at_first_cell.clone(); // Validate the entire row { @@ -819,7 +820,6 @@ mod tests { } #[test] - #[ignore] fn test_validate_table_vs_table_with_repeated_cell() { let schema_str = r#" |c2|c2| From 0f510b5ca2cdd66214a63482d12aee0aa805e38c Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 23:29:09 -0500 Subject: [PATCH 15/33] fix repeating tables --- .../node_walker/validators/tables.rs | 38 +++++++++++-------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index 9b88970..416b602 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -84,28 +84,29 @@ impl ValidatorImpl for TableVsTableValidator { // hop back to the row container, go to the next row, until there are no rows left. 'row_iter: loop { + // First check if we are dealing with a special case -- repeated rows! + trace_cursors!(schema_cursor, input_cursor); + if both_are_table_data_rows(&schema_cursor.node(), &input_cursor.node()) + && let Some(bounds) = + try_get_repeated_row_bounds(&schema_cursor, walker.schema_str()) { + // Process the repeated rows using the main cursors + let repeated_row_result = RepeatedRowVsRowValidator::from_bounds(bounds) + .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); + result.join_other_result(&repeated_row_result); + + // Update the cursors to where the repeated row validator left them + // The schema cursor stays at the repeating row, input cursor advanced past all matched rows + repeated_row_result.walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); + + // Now continue to advance both cursors to the next row + } else { // Dive in to the first row, iterate over children, hop back (hop // back is automatic since we use different cursors in the context) { let mut schema_cursor = schema_cursor.clone(); let mut input_cursor = input_cursor.clone(); - // First check if we are dealing with a special case -- repeated rows! - trace_cursors!(schema_cursor, input_cursor); - if both_are_table_data_rows(&schema_cursor.node(), &input_cursor.node()) - && let Some(bounds) = - try_get_repeated_row_bounds(&schema_cursor, walker.schema_str()) - { - // trace_cursors!(schema_cursor, input_cursor); - let repeated_row_result = RepeatedRowVsRowValidator::from_bounds(bounds) - .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); - repeated_row_result - .walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); - dbg!(repeated_row_result); - trace_cursors!(schema_cursor, input_cursor); - } - match ( schema_cursor.goto_first_child(), input_cursor.goto_first_child(), @@ -250,7 +251,7 @@ impl ValidatorImpl for RepeatedRowVsRowValidator { let mut input_cursor = walker.input_cursor().clone(); let mut result = ValidationResult::from_cursors(&schema_cursor, &input_cursor); - let need_to_restart_result = result.clone(); + let _need_to_restart_result = result.clone(); #[cfg(feature = "invariant_violations")] if !both_are_table_data_rows(&schema_cursor.node(), &input_cursor.node()) { @@ -360,6 +361,11 @@ impl ValidatorImpl for RepeatedRowVsRowValidator { } } + // Update the result to reflect where we ended up: + // - schema_cursor stays at the repeating row definition + // - input_cursor has advanced past all matched rows + result.sync_cursor_pos(&schema_cursor, &input_cursor); + result } } From 93f753af3860a83d4f9f225234ba254cf9bed383 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 23:37:21 -0500 Subject: [PATCH 16/33] fix another test --- .../node_walker/validators/tables.rs | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index 416b602..1fec4b2 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -95,6 +95,11 @@ impl ValidatorImpl for TableVsTableValidator { .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&repeated_row_result); + // If there were errors in the repeated row validation, return immediately + if repeated_row_result.has_errors() { + return result; + } + // Update the cursors to where the repeated row validator left them // The schema cursor stays at the repeating row, input cursor advanced past all matched rows repeated_row_result.walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); @@ -853,7 +858,6 @@ mod tests { } #[test] - #[ignore] fn test_validate_table_vs_table_with_repeated_cell_and_mismatch() { let schema_str = r#" |c2|c2| @@ -863,7 +867,7 @@ mod tests { let input_str = r#" |c2|c2| |-|-| -|a1|b1| +|a1|xx| |a2|b2| "#; @@ -874,6 +878,17 @@ mod tests { .validate_complete(); assert_eq!(result.errors().len(), 1); - todo!("check specific type of error"); + assert_eq!( + result.errors(), + vec![ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: 11, + input_index: 18, + expected: "^xx".to_string(), + actual: "b2".to_string(), + kind: NodeContentMismatchKind::Matcher, + } + )] + ); } } From db6f60cc4a37043e6ce0c605da66425d07400673 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Fri, 9 Jan 2026 23:54:59 -0500 Subject: [PATCH 17/33] fix schema node walking --- .../node_walker/validators/tables.rs | 94 +++++++++++++++++-- 1 file changed, 88 insertions(+), 6 deletions(-) diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index 1fec4b2..ba2f366 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -252,7 +252,7 @@ impl RepeatedRowVsRowValidator { impl ValidatorImpl for RepeatedRowVsRowValidator { fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { - let schema_cursor = walker.schema_cursor().clone(); + let mut schema_cursor = walker.schema_cursor().clone(); let mut input_cursor = walker.input_cursor().clone(); let mut result = ValidationResult::from_cursors(&schema_cursor, &input_cursor); @@ -295,8 +295,6 @@ impl ValidatorImpl for RepeatedRowVsRowValidator { let mut all_matches: Vec> = vec![Vec::new(); num_corresponding_matchers]; 'row_iter: for _ in 0..max_bound { - let schema_cursor_at_first_cell = schema_cursor_at_first_cell.clone(); - // Validate the entire row { let mut input_cursor_at_first_cell = get_cursor_at_first_cell(&input_cursor); @@ -357,9 +355,6 @@ impl ValidatorImpl for RepeatedRowVsRowValidator { } } - // TODO: bound checking - // let min_bound = self.bounds.0.unwrap_or(0); - for (matches, matcher) in all_matches.iter().zip(corresponding_matchers_only_matchers) { if let Some(key) = matcher.id() { result.set_match(key, matches.clone().into()); @@ -369,6 +364,7 @@ impl ValidatorImpl for RepeatedRowVsRowValidator { // Update the result to reflect where we ended up: // - schema_cursor stays at the repeating row definition // - input_cursor has advanced past all matched rows + schema_cursor.goto_next_sibling(); result.sync_cursor_pos(&schema_cursor, &input_cursor); result @@ -891,4 +887,90 @@ mod tests { )] ); } + + #[test] + fn test_validate_table_vs_table_with_repeated_cell_max_bound() { + let schema_str = r#" +|c2|c2| +|-|-| +|`a:/.*/`|`b:/.*/`|{,2} + "#; + let input_str = r#" +|c2|c2| +|-|-| +|a1|b1| +|a2|b2| +"#; + + let result = ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_tables(s, i))) + .validate_complete(); + + assert_eq!(result.errors(), vec![]); + assert_eq!( + *result.value(), + json!({"a": ["a1", "a2"], "b": ["b1", "b2"]}) + ); + } + + #[test] + fn test_validate_table_vs_table_with_repeated_cell_min_bound() { + let schema_str = r#" +|c2|c2| +|-|-| +|`a:/.*/`|`b:/.*/`|{2,} + "#; + let input_str = r#" +|c2|c2| +|-|-| +|a1|b1| +|a2|b2| +|a3|b3| +"#; + + let result = ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_tables(s, i))) + .validate_complete(); + + assert_eq!(result.errors(), vec![]); + assert_eq!( + *result.value(), + json!({"a": ["a1", "a2", "a3"], "b": ["b1", "b2", "b3"]}) + ); + } + + #[test] + fn test_validate_table_vs_table_repeated_then_literal() { + let schema_str = r#" +|c1|c2| +|-|-| +|`a:/.*/`|`b:/.*/`|{,2} +|lit1|lit2| +|lit3|lit4| + "#; + let input_str = r#" +|c1|c2| +|-|-| +|a1|b1| +|a2|b2| +|lit1|lit2| +|lit3|lit4| +"#; + + let result = ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_tables(s, i))) + .validate_complete(); + + assert_eq!(result.errors(), vec![]); + assert_eq!( + *result.value(), + json!({"a": ["a1", "a2"], "b": ["b1", "b2"]}) + ); + } } From a727ba674ce495ccdef16a78a919742a4eb980a2 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 00:34:58 -0500 Subject: [PATCH 18/33] improve table docs --- docs/src/content/docs/matchers/05-tables.mdx | 21 ++++- .../node_walker/validators/containers.rs | 36 +++++++- tests/misc.rs | 4 +- tests/tables.rs | 91 +++++++++++++++++++ 4 files changed, 145 insertions(+), 7 deletions(-) diff --git a/docs/src/content/docs/matchers/05-tables.mdx b/docs/src/content/docs/matchers/05-tables.mdx index 0203449..603befe 100644 --- a/docs/src/content/docs/matchers/05-tables.mdx +++ b/docs/src/content/docs/matchers/05-tables.mdx @@ -49,8 +49,6 @@ Individual cells can contain matchers: ## Repeated Rows - - Use `{min,max}` syntax on row patterns to match multiple rows: +### Mixing Literal and Repeated Rows + +You can combine literal rows with repeated rows in the same table: + + + +The validator will: +1. Match the literal "Header" row +2. Match 1-3 repeating rows with the pattern +3. Match the literal "Footer" row + ## Notes -- Rows return arrays when using repetition matchers +- Repeated rows return arrays for matched values - Headers and separator rows are required in both schema and input - Column count must match between schema and input +- Repeated row patterns must appear at the end of a row (after all cells) diff --git a/src/mdschema/validator/node_walker/validators/containers.rs b/src/mdschema/validator/node_walker/validators/containers.rs index 2ab1d54..770627e 100644 --- a/src/mdschema/validator/node_walker/validators/containers.rs +++ b/src/mdschema/validator/node_walker/validators/containers.rs @@ -70,6 +70,7 @@ impl ValidatorImpl for ContainerVsContainerValidator { fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); + let need_to_restart_result = result.clone(); let mut schema_cursor = walker.schema_cursor().clone(); let mut input_cursor = walker.input_cursor().clone(); @@ -80,7 +81,9 @@ impl ValidatorImpl for ContainerVsContainerValidator { result, &schema_cursor, &input_cursor, - "expected textual container nodes" + "expected textual container nodes, got {:?} and {:?}", + schema_cursor.node().kind(), + input_cursor.node().kind() ); } @@ -173,8 +176,35 @@ impl ValidatorImpl for ContainerVsContainerValidator { (false, false) => { return result; } - (true, false) => todo!(), - (false, true) => todo!(), + (false, true) => { + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + // okay, we'll just wait! + return need_to_restart_result; + } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::MalformedNodeStructure { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + kind: MalformedStructureKind::InputHasChildSchemaDoesnt, + }, + )); + } + } + (true, false) => { + if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { + // okay, we'll just wait! + return need_to_restart_result; + } else { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::MalformedNodeStructure { + schema_index: schema_cursor.descendant_index(), + input_index: input_cursor.descendant_index(), + kind: MalformedStructureKind::SchemaHasChildInputDoesnt, + }, + )); + } + return result; + } } loop { diff --git a/tests/misc.rs b/tests/misc.rs index e734665..125454d 100644 --- a/tests/misc.rs +++ b/tests/misc.rs @@ -3,7 +3,9 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{MalformedStructureKind, SchemaViolationError, ValidationError}; +use mdvalidate::mdschema::validator::errors::{ + MalformedStructureKind, SchemaViolationError, ValidationError, +}; test_case!( node_heading_and_paragraph, diff --git a/tests/tables.rs b/tests/tables.rs index b7d217e..e9f8515 100644 --- a/tests/tables.rs +++ b/tests/tables.rs @@ -3,6 +3,10 @@ use serde_json::json; #[macro_use] mod helpers; +use mdvalidate::mdschema::validator::errors::{ + NodeContentMismatchKind, SchemaViolationError, ValidationError, +}; + test_case!( test_literal_tables, r#" @@ -23,3 +27,90 @@ test_case!( json!({"num": "2", "name": "Wolf"}), vec![] ); + +test_case!( + test_literal_repeated_literal_sandwich, + r#" +# Shopping List + +| Item | Price | +|:-----|:------| +| Header | 10 | +| `item:/\w+/` | `price:/\d+/` |{,3} +| Footer | 99 | +"#, + r#" +# Shopping List + +| Item | Price | +|:-----|:------| +| Header | 10 | +| Apple | 5 | +| Banana | 3 | +| Cherry | 7 | +| Footer | 99 | +"#, + json!({"item": ["Apple", "Banana", "Cherry"], "price": ["5", "3", "7"]}), + vec![] +); + +test_case!( + test_literal_repeated_literal_sandwich_with_footer, + r#" +# Shopping List + +| Item | Price | +|:-----|:------| +| Header | 10 | +| `item:/\w+/` | `price:/\d+/` |{,3} +| Footer | 99 | +"#, + r#" +# Shopping List + +| Item | Price | +|:-----|:------| +| Header | 10 | +| Apple | 5 | +| Banana | 3 | +| Cherry | 7 | +| Footer | 99 | +"#, + json!({"item": ["Apple", "Banana", "Cherry"], "price": ["5", "3", "7"]}), + vec![] +); + +test_case!( + test_literal_repeated_literal_sandwich_with_mismatch, + r#" +# Shopping List + +| Item | Price | +|:-----|:------| +| Header | 10 | +| `item:/\w+/` | `price:/\d+/` |{,2} +| Footer | 99 | +"#, + r#" +# Shopping List + +| Item | Price | +|:-----|:------| +| Header | 10 | +| Apple | 5 | +| Banana | not_a_number | +| Cherry | 7 | +| Footer | 99 | +"#, + json!({}), + // Should error on the second repeated row where price doesn't match the \d+ pattern + vec![ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: 11, + input_index: 18, + expected: "^\\d+".to_string(), + actual: "not_a_number".to_string(), + kind: NodeContentMismatchKind::Matcher, + } + )] +); From 3a28b77b2e830b22e97490231a90a31409580d85 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 14:43:17 -0500 Subject: [PATCH 19/33] finalize repeated tables --- AGENTS.md | 9 +- .../node_walker/validators/containers.rs | 9 +- .../node_walker/validators/matchers.rs | 6 + .../node_walker/validators/tables.rs | 187 ++++++++++++------ tests/tables.rs | 31 ++- 5 files changed, 174 insertions(+), 68 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index ebbc638..a36bc88 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,17 +1,18 @@ -# Agent Guidelines - ## ts_types imports - Always import `ts_types` via wildcard (`use crate::mdschema::validator::ts_types::*;`) so we do not list individual members. ## Test imports -- Prefer `super::...` imports inside `#[cfg(test)]` modules (e.g., `super::test_utils::ValidatorTester` or `super::TextualVsTextualValidator`) so the tests stay concise. +- Prefer `super::...` imports inside `#[cfg(test)]` modules (e.g., `super::test_utils::ValidatorTester` or `super::TextualVsTextualValidator`) so the tests stay concise and structured. - Keep using wildcard `ts_types::*` in tests as well. ## Documentation - When a doc block lists both `schema_str` and `input_str`, use the exact wording: - `schema_str`: The full input document (so far). - `input_str`: The full schema document. -- For any doc line that mentions `got_eof`, use `/// * `got_eof`: Whether we have received the full input document.` verbatim. +- Every `///` doc line that mentions `got_eof` must read verbatim `/// * \`got_eof\`: Whether we have received the full input document.` + +## Node-walker validator docs +- Every file under `src/mdschema/validator/node_walker/validators` should start with a module doc comment and list each validator type defined in that file. ## Walker usage - Never add aliases such as `let schema_str = walker.schema_str()` or `let input_str = walker.input_str()`; call the walker methods directly. diff --git a/src/mdschema/validator/node_walker/validators/containers.rs b/src/mdschema/validator/node_walker/validators/containers.rs index 770627e..d4cfc88 100644 --- a/src/mdschema/validator/node_walker/validators/containers.rs +++ b/src/mdschema/validator/node_walker/validators/containers.rs @@ -4,6 +4,9 @@ //! - `TextualContainerVsTextualContainerValidator`: walks inline children in //! paragraphs/emphasis and validates them with matcher support and link-aware //! handling. +//! - `RepeatedMatcherParagraphVsParagraphValidator`: handles paragraphs that +//! contain a single repeating matcher, collecting matches across repeated +//! paragraphs before delegating to nested validation. use crate::mdschema::validator::matcher::matcher::MatcherKind; use crate::mdschema::validator::node_walker::helpers::check_repeating_matchers::check_repeating_matchers; use crate::mdschema::validator::node_walker::helpers::count_non_literal_matchers_in_children::count_non_literal_matchers_in_children; @@ -311,8 +314,8 @@ impl ValidatorImpl for RepeatedMatcherParagraphVsParagraphValidator { let extras = matcher.extras(); - let n = extras.max_items().unwrap_or(usize::MAX); - for _ in 0..n { + let max_matches = extras.max_items_or(usize::MAX); + for _ in 0..max_matches { // compare the ENTIRE text of the paragraph let input_paragraph_text = get_node_text(&input_cursor.node(), walker.input_str()); @@ -331,7 +334,7 @@ impl ValidatorImpl for RepeatedMatcherParagraphVsParagraphValidator { } } - if matches.len() < extras.min_items().unwrap_or(0) { + if matches.len() < extras.min_items_or(0) { if waiting_at_end(got_eof, walker.input_str(), &input_cursor) { // That's ok. We may get them later. return result; diff --git a/src/mdschema/validator/node_walker/validators/matchers.rs b/src/mdschema/validator/node_walker/validators/matchers.rs index d2bd55e..614b56d 100644 --- a/src/mdschema/validator/node_walker/validators/matchers.rs +++ b/src/mdschema/validator/node_walker/validators/matchers.rs @@ -4,6 +4,12 @@ //! Types: //! - `MatcherVsTextValidator`: handles pattern matching and capture logic used //! when schema nodes embed matcher syntax inside textual content. +//! - `TextualVsMatcherValidator`: validates textual nodes when matchers appear +//! immediately after schema text and need to cooperate with surrounding +//! literals. +//! - `LiteralMatcherVsTextualValidator`: resolves matcher usage when literal +//! matchers span multiple textual nodes, computing matches across adjacent +//! literal fragments. use log::trace; use serde_json::json; use tree_sitter::TreeCursor; diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index ba2f366..b8cb396 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -3,6 +3,9 @@ //! Types: //! - `TableVsTableValidator`: validates table structure (rows, headers, cells) //! and delegates cell content checks to textual container validation. +//! - `RepeatedRowVsRowValidator`: processes schema rows followed by matcher +//! repeaters, keeping the schema stationary while validating multiple input +//! rows against a repeating matcher row. use crate::mdschema::validator::errors::{ MalformedStructureKind, NodeContentMismatchKind, SchemaViolationError, ValidationError, }; @@ -269,9 +272,6 @@ impl ValidatorImpl for RepeatedRowVsRowValidator { ) } - // A version of the schema cursor where it is pointed at the first cell in the (repeating) row - let schema_cursor_at_first_cell = get_cursor_at_first_cell(&schema_cursor); - let max_bound = self.bounds.1.unwrap_or(usize::MAX); let corresponding_matchers = { @@ -296,56 +296,61 @@ impl ValidatorImpl for RepeatedRowVsRowValidator { 'row_iter: for _ in 0..max_bound { // Validate the entire row - { - let mut input_cursor_at_first_cell = get_cursor_at_first_cell(&input_cursor); - - let mut matcher_num = 0; - 'col_iter: for i in 0.. { - let cell_str = - get_node_text(&input_cursor_at_first_cell.node(), walker.input_str()); - - match corresponding_matchers.get(i).unwrap() { - Some(matcher) => match matcher.match_str(cell_str) { - Some(captured_str) => { - all_matches - .get_mut(matcher_num) - .unwrap() // we pre filled it properly ahead of time - .push(captured_str.to_string()); - - matcher_num += 1; - } - None => { - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::NodeContentMismatch { - schema_index: schema_cursor_at_first_cell - .descendant_index(), - input_index: input_cursor_at_first_cell.descendant_index(), - expected: matcher.pattern().to_string(), - actual: cell_str.into(), - kind: NodeContentMismatchKind::Matcher, - }, - )); - - return result; - } - }, + let mut input_cursor_at_first_cell = get_cursor_at_first_cell(&input_cursor); + let mut schema_cursor_at_first_cell = get_cursor_at_first_cell(&schema_cursor); + + let mut matcher_num = 0; + 'col_iter: for i in 0.. { + let cell_str = + get_node_text(&input_cursor_at_first_cell.node(), walker.input_str()).trim(); + + match corresponding_matchers.get(i).unwrap() { + Some(matcher) => match matcher.match_str(cell_str) { + Some(captured_str) => { + all_matches + .get_mut(matcher_num) + .unwrap() // we pre filled it properly ahead of time + .push(captured_str.to_string()); + + matcher_num += 1; + } None => { - // Validate the cell as a normal container. - let cell_result = ContainerVsContainerValidator::default().validate( - &walker.with_cursors(&schema_cursor, &input_cursor), - got_eof, - ); - result.join_data(cell_result.data()); - if cell_result.has_errors() { - result.join_errors(cell_result.errors()); - return result; - } + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: schema_cursor_at_first_cell.descendant_index(), + input_index: input_cursor_at_first_cell.descendant_index(), + expected: matcher.pattern().to_string(), + actual: cell_str.into(), + kind: NodeContentMismatchKind::Matcher, + }, + )); + + return result; + } + }, + None => { + // Validate the cell as a normal container. + let cell_result = ContainerVsContainerValidator::default().validate( + &walker.with_cursors( + &schema_cursor_at_first_cell, + &input_cursor_at_first_cell, + ), + got_eof, + ); + result.join_data(cell_result.data()); + if cell_result.has_errors() { + result.join_errors(cell_result.errors()); + return result; } } + } - if !input_cursor_at_first_cell.goto_next_sibling() { + if input_cursor_at_first_cell.goto_next_sibling() { + if !schema_cursor_at_first_cell.goto_next_sibling() { break 'col_iter; } + } else { + break 'col_iter; } } @@ -397,14 +402,47 @@ fn get_cell_indexes_that_have_simple_matcher( let mut indexes = Vec::new(); loop { - // For it to be a "simple" matcher with nothing else, it must ONLY have - // a single child, which is a code node. - let single_child_that_is_code = schema_cursor.node().child_count() == 1 - && is_inline_code_node(&schema_cursor.node().child(0).unwrap()); - - if single_child_that_is_code { - if let Ok(matcher) = Matcher::try_from_schema_cursor(&schema_cursor, schema_str) { - indexes.push(Some(matcher)); + let mut code_child_idx = None; + let mut is_simple = true; + + for idx in 0..schema_cursor.node().child_count() { + let child = schema_cursor.node().child(idx).unwrap(); + + if is_inline_code_node(&child) { + if code_child_idx.is_some() { + is_simple = false; + break; + } + code_child_idx = Some(idx); + } else if child.kind() == "text" { + let text = get_node_text(&child, schema_str); + if !text.chars().all(|c| c.is_whitespace()) { + is_simple = false; + break; + } + } else { + is_simple = false; + break; + } + } + + if is_simple { + if let Some(code_idx) = code_child_idx { + let mut matcher_cursor = schema_cursor.clone(); + if matcher_cursor.goto_first_child() { + for _ in 0..code_idx { + matcher_cursor.goto_next_sibling(); + } + if let Ok(matcher) = + Matcher::try_from_schema_cursor(&matcher_cursor, schema_str) + { + indexes.push(Some(matcher)); + } else { + indexes.push(None); + } + } else { + indexes.push(None); + } } else { indexes.push(None); } @@ -878,7 +916,7 @@ mod tests { result.errors(), vec![ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { - schema_index: 11, + schema_index: 14, input_index: 18, expected: "^xx".to_string(), actual: "b2".to_string(), @@ -973,4 +1011,41 @@ mod tests { json!({"a": ["a1", "a2"], "b": ["b1", "b2"]}) ); } + + #[test] + fn test_validate_table_vs_table_literal_repeated_literal_sandwich_with_footer() { + let schema_str = r#" +# Shopping List + +| Item | Price | +|:-----|:------| +| Header | 10 | +| `item:/\w+/` | `price:/\d+/` |{,3} +| Footer | 99 | +"#; + let input_str = r#" +# Shopping List + +| Item | Price | +|:-----|:------| +| Header | 10 | +| Apple | 5 | +| Banana | 3 | +| Cherry | 7 | +| Footer | 99 | +"#; + + let result = ValidatorTester::::from_strs(schema_str, input_str) + .walk() + .goto_first_child_then_unwrap() + .goto_next_sibling_then_unwrap() + .peek_nodes(|(s, i)| assert!(both_are_tables(s, i))) + .validate_complete(); + + assert_eq!(result.errors(), vec![]); + assert_eq!( + *result.value(), + json!({"item": ["Apple", "Banana", "Cherry"], "price": ["5", "3", "7"]}) + ); + } } diff --git a/tests/tables.rs b/tests/tables.rs index e9f8515..ddff344 100644 --- a/tests/tables.rs +++ b/tests/tables.rs @@ -45,9 +45,9 @@ test_case!( | Item | Price | |:-----|:------| | Header | 10 | -| Apple | 5 | -| Banana | 3 | -| Cherry | 7 | +| Apple | 5 | +| Banana | 3 | +| Cherry | 7 | | Footer | 99 | "#, json!({"item": ["Apple", "Banana", "Cherry"], "price": ["5", "3", "7"]}), @@ -106,11 +106,32 @@ test_case!( // Should error on the second repeated row where price doesn't match the \d+ pattern vec![ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { - schema_index: 11, - input_index: 18, + schema_index: 25, + input_index: 27, expected: "^\\d+".to_string(), actual: "not_a_number".to_string(), kind: NodeContentMismatchKind::Matcher, } )] ); + +test_case!( + test_repeated_row_sandwich, + r#" +|c1|c2| +|-|-| +|`a:/.*/`|`b:/.*/`|{,2} +|lit1|lit2| +|lit3|lit4| +"#, + r#" +|c1|c2| +|-|-| +|a1|b1| +|a2|b2| +|lit1|lit2| +|lit3|lit4| +"#, + json!({"a": ["a1", "a2"], "b": ["b1", "b2"]}), + vec![] +); From 23981775877917459c3bcbf8e495a324a7318711 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 14:43:27 -0500 Subject: [PATCH 20/33] fix table doc --- docs/src/content/docs/matchers/05-tables.mdx | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/src/content/docs/matchers/05-tables.mdx b/docs/src/content/docs/matchers/05-tables.mdx index 603befe..75455ea 100644 --- a/docs/src/content/docs/matchers/05-tables.mdx +++ b/docs/src/content/docs/matchers/05-tables.mdx @@ -5,7 +5,6 @@ order: 5 --- import SchemaAndInput from "../../../components/SchemaAndInput.astro"; -import TODO from "../../../components/TODO.astro"; Tables can be validated for structure, headers, and cell content using matchers. From 6692fc1d83846a66b6d47ec5f7c22be2368f0d9f Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 14:45:59 -0500 Subject: [PATCH 21/33] improve list formatting in docs --- src/mdschema/validator/matcher/matcher.rs | 4 +--- .../validator/node_walker/helpers/compare_node_kinds.rs | 8 ++++---- .../node_walker/helpers/compare_text_contents.rs | 8 ++++---- .../node_walker/helpers/node_children_lengths.rs | 4 ++-- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/mdschema/validator/matcher/matcher.rs b/src/mdschema/validator/matcher/matcher.rs index 3dc05ed..67f2121 100644 --- a/src/mdschema/validator/matcher/matcher.rs +++ b/src/mdschema/validator/matcher/matcher.rs @@ -171,9 +171,7 @@ impl Matcher { /// /// # Arguments /// * `pattern` - The pattern string within the matcher codeblock. - /// * `after_str` - Optional extras string following the pattern. This must - /// have a sequence of valid matcher extras, only followed by additional - /// text if there is a space in between. + /// * `after_str` - Optional extras string following the pattern. This must have a sequence of valid matcher extras, only followed by additional text if there is a space in between. pub fn try_from_pattern_and_suffix_str( pattern_str: &str, after_str: Option<&str>, diff --git a/src/mdschema/validator/node_walker/helpers/compare_node_kinds.rs b/src/mdschema/validator/node_walker/helpers/compare_node_kinds.rs index 58cc0b4..4b58388 100644 --- a/src/mdschema/validator/node_walker/helpers/compare_node_kinds.rs +++ b/src/mdschema/validator/node_walker/helpers/compare_node_kinds.rs @@ -13,10 +13,10 @@ use crate::mdschema::validator::ts_utils::{ /// - Other nodes: checks exact kind match /// /// # Arguments -/// - `schema_cursor`: Cursor at schema node -/// - `input_cursor`: Cursor at input node -/// - `schema_str`: The schema markdown string -/// - `input_str`: The input markdown string +/// * `schema_cursor`: Cursor at schema node +/// * `input_cursor`: Cursor at input node +/// * `schema_str`: The schema markdown string +/// * `input_str`: The input markdown string pub fn compare_node_kinds( schema_cursor: &TreeCursor, input_cursor: &TreeCursor, diff --git a/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs b/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs index 8d9d4ea..9f1e171 100644 --- a/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs +++ b/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs @@ -15,10 +15,10 @@ use crate::mdschema::validator::ts_utils::get_node_text; /// # Arguments /// * `schema_str`: The full input document (so far). /// * `input_str`: The full schema document. -/// - `schema_cursor`: Cursor at schema text node -/// - `input_cursor`: Cursor at input text node -/// - `is_partial_match`: Whether we're doing a partial match (not at EOF) -/// - `strip_extras`: Whether to strip extras (like `!`) from schema text +/// * `schema_cursor`: Cursor at schema text node +/// * `input_cursor`: Cursor at input text node +/// * `is_partial_match`: Whether we're doing a partial match (not at EOF) +/// * `strip_extras`: Whether to strip extras (like `!`) from schema text pub fn compare_text_contents( schema_str: &str, input_str: &str, diff --git a/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs b/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs index 7eae84a..6d3fe54 100644 --- a/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs +++ b/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs @@ -11,8 +11,8 @@ use crate::mdschema::validator::errors::{ /// - Not at EOF: input has more children than schema /// /// # Arguments -/// - `schema_cursor`: Cursor at schema node -/// - `input_cursor`: Cursor at input node +/// * `schema_cursor`: Cursor at schema node +/// * `input_cursor`: Cursor at input node /// * `got_eof`: Whether we have received the full input document. pub fn compare_node_children_lengths( schema_cursor: &TreeCursor, From 5f1c99909a5d35dc6d7d41da2e8b06950ada8390 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 15:01:57 -0500 Subject: [PATCH 22/33] add schema and input --- examples/cli/input.md | 49 +++++++++++++++++++++++++++------------- examples/cli/schema.md | 51 +++++++++++++++++++++++++++++------------- 2 files changed, 69 insertions(+), 31 deletions(-) diff --git a/examples/cli/input.md b/examples/cli/input.md index 79113d3..9750fb6 100644 --- a/examples/cli/input.md +++ b/examples/cli/input.md @@ -1,23 +1,40 @@ -# Has 1 to 3 paragraphs +Imported with [Obsidian Markdown Importer](https://github.com/404Wolf/obsidian-contact-importer) -test1 +--- -test2 +![Image](6bf36ff64dfc6d8a.jpeg) -test3 +## Phones -|c2|c2| -|-|-| -|`a:/.*/`|`b:/.*/`|{,} +| Type | Number | +| :----- | :----------------- | +| Backup | `c!(917) 246-7875` | +| Misc | `c!(929) 265-7180` | ---- +## Emails + +| Type | Address | +| :------------- | :----------------------------- | +| Misc | `c!wolf@404wolf.com` | +| Misc | `c!wsm32@case.edu` | + +## Socials + +| Type | Handle | +|:--------------|:--------------| -Output is +## Links + +| Type | URL | +| :------- | :---------------------- | +| HomePage | `g!https://404wolf.com` | + +## Other + +| Type | Value | +|:--------------|:--------------| +| Birthday | `g!xxxxxxxx` | + +--- -```json -{ -"num": "1", -"test": ["test1", "test2", "test3"] -"a": ["a1", "a2"], "b": ["b1", "b2"] -} -``` +This is me. diff --git a/examples/cli/schema.md b/examples/cli/schema.md index 9f9e039..255ac71 100644 --- a/examples/cli/schema.md +++ b/examples/cli/schema.md @@ -1,20 +1,41 @@ -# Has `num:/d/` to 3 paragraphs +Imported with [Obsidian Markdown Importer](https://github.com/404Wolf/obsidian-contact-importer) -`test:/test\d/`{1,3} +--- -|c2|c2| -|-|-| -|a1|b1| -|a2|b2| +![{image_link:/.*/}]({image_filename:/.*/}) ---- +## Phones + +| Type | Number | +| :----- | :----------------- | +| Backup | `c!(917) 246-7875` | +| Misc | `c!(929) 265-7180` | + +## Emails + +| Type | Address | +| :------------------ | :------------- | +| `contact_type:/.*/` | `contact:/.*/` |{,} + + +## Socials -Output is +| Type | Handle | +|:-------------------|:--------------| +| `social_type:/.*/` | `social:/.*/` |{,} + +## Links + +| Type | URL | +| :--------------- | :---------------------- | +| `link_type:/.*/` | `link_url:/.*/` |{,} + +## Other + +| Type | Value | +|:------------------|:--------------| +| `other_type:/.*/` | `other_url:/.*/` |{,} + +--- -```json -{ -"num": "1", -"test": ["test1", "test2", "test3"] -"a": ["a1", "a2"], "b": ["b1", "b2"] -} -``` +`comments`{,} From bb9ab850c7e0cad7173c2a36f958e57d0c397dbf Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 15:21:17 -0500 Subject: [PATCH 23/33] whitespace insensitive for table cells --- examples/cli/input.md | 28 --------------- examples/cli/schema.md | 36 ++----------------- .../node_walker/validators/tables.rs | 1 - src/mdschema/validator/ts_utils.rs | 8 ++++- tests/tables.rs | 22 ++++++------ 5 files changed, 21 insertions(+), 74 deletions(-) diff --git a/examples/cli/input.md b/examples/cli/input.md index 9750fb6..37c255b 100644 --- a/examples/cli/input.md +++ b/examples/cli/input.md @@ -10,31 +10,3 @@ Imported with [Obsidian Markdown Importer](https://github.com/404Wolf/obsidian-c | :----- | :----------------- | | Backup | `c!(917) 246-7875` | | Misc | `c!(929) 265-7180` | - -## Emails - -| Type | Address | -| :------------- | :----------------------------- | -| Misc | `c!wolf@404wolf.com` | -| Misc | `c!wsm32@case.edu` | - -## Socials - -| Type | Handle | -|:--------------|:--------------| - -## Links - -| Type | URL | -| :------- | :---------------------- | -| HomePage | `g!https://404wolf.com` | - -## Other - -| Type | Value | -|:--------------|:--------------| -| Birthday | `g!xxxxxxxx` | - ---- - -This is me. diff --git a/examples/cli/schema.md b/examples/cli/schema.md index 255ac71..e4ae77e 100644 --- a/examples/cli/schema.md +++ b/examples/cli/schema.md @@ -6,36 +6,6 @@ Imported with [Obsidian Markdown Importer](https://github.com/404Wolf/obsidian-c ## Phones -| Type | Number | -| :----- | :----------------- | -| Backup | `c!(917) 246-7875` | -| Misc | `c!(929) 265-7180` | - -## Emails - -| Type | Address | -| :------------------ | :------------- | -| `contact_type:/.*/` | `contact:/.*/` |{,} - - -## Socials - -| Type | Handle | -|:-------------------|:--------------| -| `social_type:/.*/` | `social:/.*/` |{,} - -## Links - -| Type | URL | -| :--------------- | :---------------------- | -| `link_type:/.*/` | `link_url:/.*/` |{,} - -## Other - -| Type | Value | -|:------------------|:--------------| -| `other_type:/.*/` | `other_url:/.*/` |{,} - ---- - -`comments`{,} +| Type | Number | +| :---------------- | :------------------ | +| `phone_type:/.*/` | `phone_number:/.*/` |{,} diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index b8cb396..bcc9238 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -88,7 +88,6 @@ impl ValidatorImpl for TableVsTableValidator { 'row_iter: loop { // First check if we are dealing with a special case -- repeated rows! - trace_cursors!(schema_cursor, input_cursor); if both_are_table_data_rows(&schema_cursor.node(), &input_cursor.node()) && let Some(bounds) = try_get_repeated_row_bounds(&schema_cursor, walker.schema_str()) diff --git a/src/mdschema/validator/ts_utils.rs b/src/mdschema/validator/ts_utils.rs index a120aa6..a0bf5b1 100644 --- a/src/mdschema/validator/ts_utils.rs +++ b/src/mdschema/validator/ts_utils.rs @@ -14,7 +14,13 @@ use std::sync::LazyLock; /// Extract text from a tree-sitter node using the provided source string. pub fn get_node_text<'a, S: Into<&'a str>>(node: &Node, src: S) -> &'a str { let src_ref = src.into(); - node.utf8_text(src_ref.as_bytes()).unwrap() + let node_str = node.utf8_text(src_ref.as_bytes()).unwrap(); + + if is_table_cell_node(&node) || node.parent().is_some_and(|n| is_table_cell_node(&n)) { + node_str.trim_start().trim_end() + } else { + node_str + } } /// Ordered lists use numbers followed by period . or right paren ) diff --git a/tests/tables.rs b/tests/tables.rs index ddff344..b927f16 100644 --- a/tests/tables.rs +++ b/tests/tables.rs @@ -33,22 +33,22 @@ test_case!( r#" # Shopping List -| Item | Price | -|:-----|:------| -| Header | 10 | +| Item | Price | +|:-----|:----------------------| +| Header | 10 | | `item:/\w+/` | `price:/\d+/` |{,3} -| Footer | 99 | +| Footer | 99 | "#, r#" # Shopping List -| Item | Price | -|:-----|:------| -| Header | 10 | -| Apple | 5 | -| Banana | 3 | -| Cherry | 7 | -| Footer | 99 | +| Item | Price | +|:-------|:------| +| Header | 10 | +| Apple | 5 | +| Banana | 3 | +| Cherry | 7 | +| Footer | 99 | "#, json!({"item": ["Apple", "Banana", "Cherry"], "price": ["5", "3", "7"]}), vec![] From 31800e169106000fd9076f9dcbeef31f42c7c144 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 16:39:49 -0500 Subject: [PATCH 24/33] fix trimming for matchers in tables --- .../validator/node_walker/validators/matchers.rs | 16 ++++++++++++---- .../validator/node_walker/validators/tables.rs | 2 +- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/mdschema/validator/node_walker/validators/matchers.rs b/src/mdschema/validator/node_walker/validators/matchers.rs index 614b56d..6e200e7 100644 --- a/src/mdschema/validator/node_walker/validators/matchers.rs +++ b/src/mdschema/validator/node_walker/validators/matchers.rs @@ -346,11 +346,19 @@ impl ValidatorImpl for MatcherVsTextValidator { }; // Seek forward from the current input byte offset by the length of the suffix - let input_suffix_len = input_cursor.node().byte_range().end - input_byte_offset; - - // Check if input_suffix is shorter than schema_suffix - let input_suffix = + let input_suffix_raw = &walker.input_str()[input_byte_offset..input_cursor.node().byte_range().end]; + + // Trim the input suffix if we're in a table cell context, to match how schema_suffix is obtained + let input_suffix = if is_table_cell_node(&input_cursor.node()) + || input_cursor.node().parent().is_some_and(|n| is_table_cell_node(&n)) { + input_suffix_raw.trim() + } else { + input_suffix_raw + }; + + // Calculate the actual length after potential trimming + let input_suffix_len = input_suffix.len(); if input_suffix_len < schema_suffix.len() { if got_eof { diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index bcc9238..06cb0d3 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -18,7 +18,7 @@ use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorIm use crate::mdschema::validator::ts_types::*; use crate::mdschema::validator::ts_utils::{get_node_text, waiting_at_end}; use crate::mdschema::validator::validator_walker::ValidatorWalker; -use crate::{invariant_violation, trace_cursors}; +use crate::invariant_violation; use log::trace; use tree_sitter::TreeCursor; From c9ecc4bbc3f5c0a3bebdbe9357611466ac592fd4 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 16:40:27 -0500 Subject: [PATCH 25/33] make cargo check happy --- .../validator/node_walker/helpers/node_children_lengths.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs b/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs index 6d3fe54..3b36dd8 100644 --- a/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs +++ b/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs @@ -14,6 +14,7 @@ use crate::mdschema::validator::errors::{ /// * `schema_cursor`: Cursor at schema node /// * `input_cursor`: Cursor at input node /// * `got_eof`: Whether we have received the full input document. +#[allow(dead_code)] // TODO: use this instead of throwing children descendant mismatches pub fn compare_node_children_lengths( schema_cursor: &TreeCursor, input_cursor: &TreeCursor, From 45761e648f89cb502cfafed18a9058c1a611dcba Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 16:44:37 -0500 Subject: [PATCH 26/33] run clippy --- src/cmd.rs | 2 +- src/env.rs | 6 +---- src/main.rs | 10 ++++----- src/mdschema/validator/errors.rs | 2 +- src/mdschema/validator/matcher/matcher.rs | 8 +++---- .../validator/matcher/matcher_extras.rs | 7 +++--- .../node_walker/helpers/compare_node_kinds.rs | 6 ++--- .../helpers/compare_text_contents.rs | 2 +- .../helpers/expected_input_nodes.rs | 22 ++++++++----------- .../helpers/node_children_lengths.rs | 2 +- src/mdschema/validator/node_walker/mod.rs | 2 +- .../validator/node_walker/node_walker.rs | 2 +- .../validator/node_walker/validators/code.rs | 4 +--- .../node_walker/validators/containers.rs | 18 ++++++--------- .../validator/node_walker/validators/links.rs | 5 ++--- .../validator/node_walker/validators/lists.rs | 12 +++++----- .../node_walker/validators/matchers.rs | 8 +++---- .../validator/node_walker/validators/mod.rs | 2 +- .../validator/node_walker/validators/nodes.rs | 22 +++++++++---------- .../node_walker/validators/tables.rs | 2 +- .../node_walker/validators/textual.rs | 8 +++---- src/mdschema/validator/ts_utils.rs | 11 +++++----- src/mdschema/validator/validator.rs | 10 ++++----- 23 files changed, 76 insertions(+), 97 deletions(-) diff --git a/src/cmd.rs b/src/cmd.rs index c453bce..b399af4 100644 --- a/src/cmd.rs +++ b/src/cmd.rs @@ -372,7 +372,7 @@ This is a test"#; "Expected exactly one error but found {:?}", errors ); - assert!(matches.is_null() || matches.as_object().map_or(true, |obj| obj.is_empty())); + assert!(matches.is_null() || matches.as_object().is_none_or(|obj| obj.is_empty())); } } diff --git a/src/env.rs b/src/env.rs index e32ed40..eb1dcfc 100644 --- a/src/env.rs +++ b/src/env.rs @@ -9,6 +9,7 @@ use serde::Deserialize; /// /// All fields are optional. #[derive(Debug, Deserialize, Clone)] +#[derive(Default)] pub struct EnvConfig { /// Enable debug mode for error output. /// @@ -20,11 +21,6 @@ pub struct EnvConfig { pub dev_debug: bool, } -impl Default for EnvConfig { - fn default() -> Self { - Self { dev_debug: false } - } -} impl EnvConfig { /// Load configuration from environment variables. diff --git a/src/main.rs b/src/main.rs index ac6f170..49a58a7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -51,20 +51,18 @@ fn main() -> Result<(), Box> { let env_config = EnvConfig::load(); let schema_src = PathOrStdio::from(args.schema); - let schema_src = schema_src.reader().or_else(|e| { - Err(format!( + let schema_src = schema_src.reader().map_err(|e| format!( "Failed to open schema file '{}': {}", schema_src.filepath(), e - )) - })?; + ))?; let mut schema_str = String::new(); BufReader::new(schema_src).read_to_string(&mut schema_str)?; let input = PathOrStdio::from(args.input); let mut input_reader = input.reader()?; - let mut output_writer: &mut Option<&mut Box> = match args.output { + let output_writer: &mut Option<&mut Box> = match args.output { Some(ref output_path) => { let output_pos = PathOrStdio::from(output_path.clone()); &mut Some(&mut output_pos.writer()?) @@ -75,7 +73,7 @@ fn main() -> Result<(), Box> { match process_stdio( &schema_str, &mut input_reader, - &mut output_writer, + output_writer, input.filepath(), args.fast_fail, args.quiet, diff --git a/src/mdschema/validator/errors.rs b/src/mdschema/validator/errors.rs index e814216..d8750af 100644 --- a/src/mdschema/validator/errors.rs +++ b/src/mdschema/validator/errors.rs @@ -19,7 +19,7 @@ macro_rules! trace_cursors { ($schema_cursor:expr, $input_cursor:expr) => {{ println!( "{}", - crate::mdschema::validator::node_walker::utils::pretty_print_cursor_pair( + $crate::mdschema::validator::node_walker::utils::pretty_print_cursor_pair( &$schema_cursor, &$input_cursor, ) diff --git a/src/mdschema/validator/matcher/matcher.rs b/src/mdschema/validator/matcher/matcher.rs index 67f2121..68f4e7e 100644 --- a/src/mdschema/validator/matcher/matcher.rs +++ b/src/mdschema/validator/matcher/matcher.rs @@ -188,7 +188,7 @@ impl Matcher { } let (id, pattern) = match captures { - Some(caps) => extract_id_and_pattern(&caps, &pattern_str)?, + Some(caps) => extract_id_and_pattern(&caps, pattern_str)?, None => { return Err(MatcherError::MatcherInteriorRegexInvalid(format!( "Expected format: 'id:/regex/' or 'id', got {}", @@ -233,7 +233,7 @@ impl Matcher { let pattern_str = get_node_text(&schema_cursor.node(), schema_str); let next_node = get_next_node(schema_cursor); let extras_str = next_node - .filter(|n| is_text_node(&n)) // don't bother if not text; extras must be in text + .filter(|n| is_text_node(n)) // don't bother if not text; extras must be in text .map(|n| get_node_text(&n, schema_str)) .and_then(|n| partition_at_special_chars(n).map(|(extras, _)| extras)); @@ -258,7 +258,7 @@ impl Matcher { /// The ID of the matcher. This is the key in the final JSON. pub fn id(&self) -> Option<&str> { - self.id.as_ref().map(|s| s.as_str()) + self.id.as_deref() } /// Get a reference to the extras @@ -500,7 +500,7 @@ mod tests { // rather than there being wrong ones. We probably want to change this // eventually though. let result = Matcher::try_from_pattern_and_suffix_str("`name:/test/`", Some("bullshit")); - assert!(!result.is_err()); // TODO: for now + assert!(result.is_ok()); // TODO: for now } #[test] diff --git a/src/mdschema/validator/matcher/matcher_extras.rs b/src/mdschema/validator/matcher/matcher_extras.rs index 8dc5ecf..b29c3c6 100644 --- a/src/mdschema/validator/matcher/matcher_extras.rs +++ b/src/mdschema/validator/matcher/matcher_extras.rs @@ -47,7 +47,7 @@ pub fn has_literal_within_extras(text: &str) -> bool { && text.len() != 1 && !{ match partition_at_special_chars(&text[1..]) { - Some((extras, _after)) => extras == "", + Some((extras, _after)) => extras.is_empty(), None => false, } } @@ -128,11 +128,10 @@ impl MatcherExtras { /// * `text` - Optional text following the matcher code block pub fn try_new(text: Option<&str>) -> Result { // Check if text matches the pattern, if text is provided - if let Some(text) = text { - if !MATCHERS_EXTRA_PATTERN.is_match(text) { + if let Some(text) = text + && !MATCHERS_EXTRA_PATTERN.is_match(text) { return Err(MatcherExtrasError::MatcherExtrasInvalid); } - } Ok(match text { Some(text) => { diff --git a/src/mdschema/validator/node_walker/helpers/compare_node_kinds.rs b/src/mdschema/validator/node_walker/helpers/compare_node_kinds.rs index 4b58388..240e524 100644 --- a/src/mdschema/validator/node_walker/helpers/compare_node_kinds.rs +++ b/src/mdschema/validator/node_walker/helpers/compare_node_kinds.rs @@ -63,11 +63,11 @@ pub fn compare_node_kinds( } if schema_cursor.node().kind() == "atx_heading" && input_cursor.node().kind() == "atx_heading" { - let schema_heading_kind = match get_heading_kind(&schema_cursor) { + let schema_heading_kind = match get_heading_kind(schema_cursor) { Ok(kind) => kind, Err(error) => return Some(error), }; - let input_heading_kind = match get_heading_kind(&input_cursor) { + let input_heading_kind = match get_heading_kind(input_cursor) { Ok(kind) => kind, Err(error) => return Some(error), }; @@ -123,7 +123,7 @@ macro_rules! compare_node_kinds_check { $input_str:expr, $result:expr ) => { - if let Some(error) = crate::mdschema::validator::node_walker::helpers::compare_node_kinds::compare_node_kinds( + if let Some(error) = $crate::mdschema::validator::node_walker::helpers::compare_node_kinds::compare_node_kinds( &$schema_cursor, &$input_cursor, $schema_str, diff --git a/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs b/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs index 9f1e171..7928b3b 100644 --- a/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs +++ b/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs @@ -149,7 +149,7 @@ macro_rules! compare_text_contents_check { $result:expr ) => { { - let text_result = crate::mdschema::validator::node_walker::helpers::compare_text_contents::compare_text_contents( + let text_result = $crate::mdschema::validator::node_walker::helpers::compare_text_contents::compare_text_contents( $schema_str, $input_str, &$schema_cursor, diff --git a/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs b/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs index 7d22473..e281ac1 100644 --- a/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs +++ b/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs @@ -16,7 +16,7 @@ use crate::mdschema::validator::{ /// # Algorithm /// /// ```ignore -//// we at matcher? +/// we at matcher? /// ├── no /// │ └── next is matcher? /// │ ├── no -> 0 @@ -131,19 +131,15 @@ fn has_extra_text(schema_cursor: &TreeCursor, schema_str: &str) -> Result { let had_next_matcher = move_cursor_to_next_matcher(&mut lookahead_cursor, schema_str)?; - let has_text_after_matcher = text_after_matcher(schema_cursor, schema_str)? != ""; + let has_text_after_matcher = !(text_after_matcher(schema_cursor, schema_str)?).is_empty(); if has_text_after_matcher { return Ok(true); } if is_literal { - let next_is_literal = match at_coalescing_matcher(&lookahead_cursor, schema_str) - .unwrap_or(Some(false)) - { - Some(next_matcher_is_literal) => next_matcher_is_literal, - None => false, - }; + let next_is_literal = at_coalescing_matcher(&lookahead_cursor, schema_str) + .unwrap_or(Some(false)).unwrap_or_default(); if !had_next_matcher { return Ok(false); }; @@ -173,7 +169,7 @@ fn text_after_matcher<'a>( ); } - match get_next_node(&schema_cursor) { + match get_next_node(schema_cursor) { Some(next_node) => { if !is_text_node(&next_node) { return Ok(""); @@ -204,7 +200,7 @@ fn extras_after_matcher<'a>( ); } - match get_next_node(&schema_cursor) { + match get_next_node(schema_cursor) { Some(next_node) => { let next_node_str = get_node_text(&next_node, schema_str); @@ -266,7 +262,7 @@ fn move_cursor_to_next_matcher( schema_cursor: &mut TreeCursor, schema_str: &str, ) -> Result { - let extras_after_matcher = extras_after_matcher(schema_cursor, schema_str)? != ""; + let extras_after_matcher = !(extras_after_matcher(schema_cursor, schema_str)?).is_empty(); // If there was extras after the matcher, that means we should skip to the // next next node @@ -310,7 +306,7 @@ mod tests { has_extra_text(&schema_cursor, schema_str).unwrap() } - fn get_text_after_matcher<'a>(schema_str: &'a str) -> &'a str { + fn get_text_after_matcher(schema_str: &str) -> &str { let schema_tree = parse_markdown(schema_str).unwrap(); let mut schema_cursor = schema_tree.walk(); schema_cursor.goto_first_child(); @@ -318,7 +314,7 @@ mod tests { text_after_matcher(&schema_cursor, schema_str).unwrap() } - fn get_extras_after_matcher<'a>(schema_str: &'a str) -> &'a str { + fn get_extras_after_matcher(schema_str: &str) -> &str { let schema_tree = parse_markdown(schema_str).unwrap(); let mut schema_cursor = schema_tree.walk(); schema_cursor.goto_first_child(); diff --git a/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs b/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs index 3b36dd8..43cd645 100644 --- a/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs +++ b/src/mdschema/validator/node_walker/helpers/node_children_lengths.rs @@ -76,7 +76,7 @@ macro_rules! compare_node_children_lengths_check { $got_eof:expr, $result:expr ) => { - if let Some(error) = crate::mdschema::validator::node_walker::helpers::node_children_lengths::compare_node_children_lengths( + if let Some(error) = $crate::mdschema::validator::node_walker::helpers::node_children_lengths::compare_node_children_lengths( &$schema_cursor, &$input_cursor, $got_eof, diff --git a/src/mdschema/validator/node_walker/mod.rs b/src/mdschema/validator/node_walker/mod.rs index 0379ba9..bede698 100644 --- a/src/mdschema/validator/node_walker/mod.rs +++ b/src/mdschema/validator/node_walker/mod.rs @@ -2,7 +2,7 @@ pub mod node_walker; pub use validation_result::ValidationResult; -pub(self) mod helpers; + mod helpers; mod validation_result; pub(super) mod validators; diff --git a/src/mdschema/validator/node_walker/node_walker.rs b/src/mdschema/validator/node_walker/node_walker.rs index 71ff978..f6f693a 100644 --- a/src/mdschema/validator/node_walker/node_walker.rs +++ b/src/mdschema/validator/node_walker/node_walker.rs @@ -57,7 +57,7 @@ impl<'a, S: ValidatorState> NodeWalker<'a, S> { .farthest_reached_pos() .walk_cursors_to_pos(schema_cursor, input_cursor); - let validation_result = NodeVsNodeValidator::default().validate(&walker, got_eof); + let validation_result = NodeVsNodeValidator.validate(&walker, got_eof); self.state.push_validation_result(validation_result.clone()); diff --git a/src/mdschema/validator/node_walker/validators/code.rs b/src/mdschema/validator/node_walker/validators/code.rs index 48d50d6..ffbf324 100644 --- a/src/mdschema/validator/node_walker/validators/code.rs +++ b/src/mdschema/validator/node_walker/validators/code.rs @@ -159,8 +159,7 @@ fn validate_code_vs_code_impl(walker: &ValidatorWalker) -> ValidationResult { Some((input_lang_str, input_lang_descendant_index)), Some((schema_lang_str, schema_lang_descendant_index)), ) = (schema_lang, input_lang) - { - if input_lang_str != schema_lang_str { + && input_lang_str != schema_lang_str { result.add_error(ValidationError::SchemaViolation( SchemaViolationError::NodeContentMismatch { schema_index: *schema_lang_descendant_index, @@ -171,7 +170,6 @@ fn validate_code_vs_code_impl(walker: &ValidatorWalker) -> ValidationResult { }, )); } - } } } diff --git a/src/mdschema/validator/node_walker/validators/containers.rs b/src/mdschema/validator/node_walker/validators/containers.rs index d4cfc88..39b7c65 100644 --- a/src/mdschema/validator/node_walker/validators/containers.rs +++ b/src/mdschema/validator/node_walker/validators/containers.rs @@ -99,12 +99,12 @@ impl ValidatorImpl for ContainerVsContainerValidator { ); if is_repeated_matcher_paragraph(&schema_cursor, walker.schema_str()) { - return RepeatedMatcherParagraphVsParagraphValidator::default() + return RepeatedMatcherParagraphVsParagraphValidator .validate(walker, got_eof); } - if !self.allow_repeating { - if let Some(repeating_matcher_index) = + if !self.allow_repeating + && let Some(repeating_matcher_index) = check_repeating_matchers(&schema_cursor, walker.schema_str()) { result.add_error(ValidationError::SchemaError( @@ -114,7 +114,6 @@ impl ValidatorImpl for ContainerVsContainerValidator { )); return result; } - } match count_non_literal_matchers_in_children(&schema_cursor, walker.schema_str()) { Ok(non_literal_matchers_in_children) @@ -214,10 +213,10 @@ impl ValidatorImpl for ContainerVsContainerValidator { let pair_result = if both_are_link_nodes(&schema_cursor.node(), &input_cursor.node()) || both_are_image_nodes(&schema_cursor.node(), &input_cursor.node()) { - LinkVsLinkValidator::default() + LinkVsLinkValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof) } else { - let new_result = TextualVsTextualValidator::default() + let new_result = TextualVsTextualValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); new_result.walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); new_result @@ -274,7 +273,7 @@ pub(super) struct RepeatedMatcherParagraphVsParagraphValidator; impl ValidatorImpl for RepeatedMatcherParagraphVsParagraphValidator { fn validate_impl(&self, walker: &ValidatorWalker, got_eof: bool) -> ValidationResult { let mut result = - ValidationResult::from_cursors(walker.schema_cursor(), &walker.input_cursor()); + ValidationResult::from_cursors(walker.schema_cursor(), walker.input_cursor()); let mut schema_cursor = walker.schema_cursor().clone(); let mut input_cursor = walker.input_cursor().clone(); @@ -320,10 +319,7 @@ impl ValidatorImpl for RepeatedMatcherParagraphVsParagraphValidator { let input_paragraph_text = get_node_text(&input_cursor.node(), walker.input_str()); - match matcher.match_str(input_paragraph_text) { - Some(matched) => matches.push(matched), - None => {} - } + if let Some(matched) = matcher.match_str(input_paragraph_text) { matches.push(matched) } let prev_sibling = input_cursor.clone(); if input_cursor.goto_next_sibling() && is_paragraph_node(&input_cursor.node()) { diff --git a/src/mdschema/validator/node_walker/validators/links.rs b/src/mdschema/validator/node_walker/validators/links.rs index 25d3dd7..aeb9f85 100644 --- a/src/mdschema/validator/node_walker/validators/links.rs +++ b/src/mdschema/validator/node_walker/validators/links.rs @@ -274,8 +274,8 @@ fn validate_link_destination( } } - if let Some(matcher_result) = extract_matcher_from_curly_delineated_text(input_text) { - if let Ok(matcher) = matcher_result { + if let Some(matcher_result) = extract_matcher_from_curly_delineated_text(input_text) + && let Ok(matcher) = matcher_result { if let Some(matched_str) = matcher.match_str(schema_text) { if let Some(id) = matcher.id() { result.set_match(id, json!(matched_str)); @@ -294,7 +294,6 @@ fn validate_link_destination( return result; } - } let text_result = compare_text_contents( schema_str, diff --git a/src/mdschema/validator/node_walker/validators/lists.rs b/src/mdschema/validator/node_walker/validators/lists.rs index 2e9d3d0..7df7dba 100644 --- a/src/mdschema/validator/node_walker/validators/lists.rs +++ b/src/mdschema/validator/node_walker/validators/lists.rs @@ -307,7 +307,7 @@ impl ValidatorImpl for ListVsListValidator { // If there are more items to validate AT THE SAME LEVEL, recurse to // validate them. We now use the *next* schema node too. if schema_cursor.goto_next_sibling() && input_cursor.goto_next_sibling() { - let next_result = ListVsListValidator::default() + let next_result = ListVsListValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&next_result); } @@ -326,7 +326,7 @@ impl ValidatorImpl for ListVsListValidator { schema_cursor.node().kind() ); - let next_result = ListVsListValidator::default() + let next_result = ListVsListValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); // We need to be able to capture errors that happen in the recursive call result.join_errors(next_result.errors()); @@ -489,7 +489,7 @@ impl ValidatorImpl for ListVsListValidator { input_cursor.goto_first_child(); schema_cursor.goto_first_child(); - let deeper_result = ListVsListValidator::default() + let deeper_result = ListVsListValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&deeper_result); } @@ -498,7 +498,7 @@ impl ValidatorImpl for ListVsListValidator { // Recurse on next sibling if available! if schema_cursor.goto_next_sibling() && input_cursor.goto_next_sibling() { trace!("Moving to next sibling list items for continued validation"); - let new_matches = ListVsListValidator::default() + let new_matches = ListVsListValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&new_matches); } else { @@ -549,7 +549,7 @@ fn validate_list_item_contents_vs_list_item_contents( input_str: &str, got_eof: bool, ) -> (ValidationResult, bool) { - let mut result = ValidationResult::from_cursors(&schema_cursor, &input_cursor); + let mut result = ValidationResult::from_cursors(schema_cursor, input_cursor); let mut schema_cursor = schema_cursor.clone(); let mut input_cursor = input_cursor.clone(); @@ -885,7 +885,7 @@ mod tests { .unwrap() .unwrap(); - assert_eq!(matcher.id(), Some("name".into())); + assert_eq!(matcher.id(), Some("name")); // MatcherType is now always a regex pattern assert!(!format!("{}", matcher.pattern()).is_empty()); } diff --git a/src/mdschema/validator/node_walker/validators/matchers.rs b/src/mdschema/validator/node_walker/validators/matchers.rs index 6e200e7..df612b0 100644 --- a/src/mdschema/validator/node_walker/validators/matchers.rs +++ b/src/mdschema/validator/node_walker/validators/matchers.rs @@ -285,7 +285,7 @@ impl ValidatorImpl for MatcherVsTextValidator { trace!( "Matcher did not match input string: pattern={}, input='{}'", - matcher.pattern().to_string(), + matcher.pattern(), input_after_prefix ); @@ -294,7 +294,7 @@ impl ValidatorImpl for MatcherVsTextValidator { schema_index: schema_cursor.descendant_index(), input_index: input_cursor_descendant_index, expected: matcher.pattern().to_string(), - actual: input_after_prefix.into(), + actual: input_after_prefix, kind: NodeContentMismatchKind::Matcher, }, )); @@ -318,7 +318,7 @@ impl ValidatorImpl for MatcherVsTextValidator { } // Delegate to the literal matcher validator - return LiteralMatcherVsTextualValidator::default() + return LiteralMatcherVsTextualValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } _ => result.add_error(ValidationError::SchemaError(SchemaError::MatcherError { @@ -880,7 +880,7 @@ mod tests { let walker = ValidatorWalker::from_cursors(&schema_cursor, schema_str, &input_cursor, input_str); - let result = TextualVsTextualValidator::default().validate(&walker, true); + let result = TextualVsTextualValidator.validate(&walker, true); assert!(result.errors().is_empty()); assert_eq!(result.value(), &json!({"test": "test"})); diff --git a/src/mdschema/validator/node_walker/validators/mod.rs b/src/mdschema/validator/node_walker/validators/mod.rs index 7aed4bc..b8c96aa 100644 --- a/src/mdschema/validator/node_walker/validators/mod.rs +++ b/src/mdschema/validator/node_walker/validators/mod.rs @@ -174,7 +174,7 @@ mod test_utils { self.print(); panic!(); #[allow(unreachable_code)] - return &mut self; + return self; } pub fn print(&mut self) -> &mut Self { diff --git a/src/mdschema/validator/node_walker/validators/nodes.rs b/src/mdschema/validator/node_walker/validators/nodes.rs index 408bcad..9841f26 100644 --- a/src/mdschema/validator/node_walker/validators/nodes.rs +++ b/src/mdschema/validator/node_walker/validators/nodes.rs @@ -55,20 +55,20 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida if both_are_textual_nodes(&schema_node, &input_node) { trace!("Both are textual nodes, validating text vs text"); - return TextualVsTextualValidator::default() + return TextualVsTextualValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are codeblock nodes else if both_are_codeblocks(&schema_node, &input_node) { - return CodeVsCodeValidator::default() + return CodeVsCodeValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } else if both_are_quotes(&schema_node, &input_node) { - return QuoteVsQuoteValidator::default() + return QuoteVsQuoteValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are tables else if both_are_tables(&schema_node, &input_node) { - return TableVsTableValidator::default() + return TableVsTableValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are textual containers @@ -78,19 +78,19 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida } // Both are textual nodes else if both_are_textual_nodes(&schema_node, &input_node) { - return TextualVsTextualValidator::default() + return TextualVsTextualValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are link nodes or image nodes else if both_are_link_nodes(&schema_node, &input_node) || both_are_image_nodes(&schema_node, &input_node) { - return LinkVsLinkValidator::default() + return LinkVsLinkValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are list nodes else if both_are_list_nodes(&schema_node, &input_node) { - return ListVsListValidator::default() + return ListVsListValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); } // Both are ruler nodes @@ -100,7 +100,7 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida // First, if they are headings, validate the headings themselves. trace!("Both are heading nodes, validating heading vs heading"); - let heading_result = HeadingVsHeadingValidator::default() + let heading_result = HeadingVsHeadingValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&heading_result); @@ -130,7 +130,7 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida input_cursor.goto_first_child(), ) { (true, true) => { - let new_result = NodeVsNodeValidator::default() + let new_result = NodeVsNodeValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&new_result); result.sync_cursor_pos(&schema_cursor, &input_cursor); @@ -180,7 +180,7 @@ fn validate_node_vs_node_impl(walker: &ValidatorWalker, got_eof: bool) -> Valida input_cursor.goto_next_sibling(), ) { (true, true) => { - let new_result = NodeVsNodeValidator::default() + let new_result = NodeVsNodeValidator .validate(&walker.with_cursors(&schema_cursor, &input_cursor), got_eof); result.join_other_result(&new_result); result.sync_cursor_pos(&schema_cursor, &input_cursor); @@ -325,7 +325,7 @@ mod tests { || result .value() .as_object() - .map_or(true, |obj| obj.is_empty()) + .is_none_or(|obj| obj.is_empty()) ); } diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validator/node_walker/validators/tables.rs index 06cb0d3..f95a5a9 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validator/node_walker/validators/tables.rs @@ -502,7 +502,7 @@ fn try_get_repeated_row_bounds( let full_row_str = get_node_text(&schema_cursor.node(), schema_str); // We are guaranteed there will be a cell at the very end that could be a // correct repeater if the cell does not end with "|" or ":" - if full_row_str.ends_with(|c| c == '|' || c == ':') { + if full_row_str.ends_with(['|', ':']) { return None; } diff --git a/src/mdschema/validator/node_walker/validators/textual.rs b/src/mdschema/validator/node_walker/validators/textual.rs index 03c3eb5..7019492 100644 --- a/src/mdschema/validator/node_walker/validators/textual.rs +++ b/src/mdschema/validator/node_walker/validators/textual.rs @@ -49,7 +49,7 @@ fn validate_textual_vs_textual_impl(walker: &ValidatorWalker, got_eof: bool) -> }; if current_node_is_code_node || current_node_is_text_node_and_next_node_code_node { - return MatcherVsTextValidator::default().validate(walker, got_eof); + return MatcherVsTextValidator.validate(walker, got_eof); } validate_textual_vs_textual_direct( @@ -93,12 +93,12 @@ pub(super) fn validate_textual_vs_textual_direct( compare_node_kinds_check!(schema_cursor, input_cursor, schema_str, input_str, result); - let is_partial_match = waiting_at_end(got_eof, input_str, &input_cursor); + let is_partial_match = waiting_at_end(got_eof, input_str, input_cursor); let text_result = compare_text_contents( schema_str, input_str, - &schema_cursor, - &input_cursor, + schema_cursor, + input_cursor, is_partial_match, false, ); diff --git a/src/mdschema/validator/ts_utils.rs b/src/mdschema/validator/ts_utils.rs index a0bf5b1..1c6d731 100644 --- a/src/mdschema/validator/ts_utils.rs +++ b/src/mdschema/validator/ts_utils.rs @@ -16,7 +16,7 @@ pub fn get_node_text<'a, S: Into<&'a str>>(node: &Node, src: S) -> &'a str { let src_ref = src.into(); let node_str = node.utf8_text(src_ref.as_bytes()).unwrap(); - if is_table_cell_node(&node) || node.parent().is_some_and(|n| is_table_cell_node(&n)) { + if is_table_cell_node(node) || node.parent().is_some_and(|n| is_table_cell_node(&n)) { node_str.trim_start().trim_end() } else { node_str @@ -339,7 +339,6 @@ pub fn validate_str(schema: &str, input: &str) -> (serde_json::Value, Vec>(); let matches = validator.matches_so_far().clone(); @@ -478,22 +477,22 @@ mod tests { // Root node should be a document and should not be the last node assert_eq!(root_node.kind(), "document"); - assert_eq!(is_last_node(input, &root_node), false); + assert!(!is_last_node(input, &root_node)); // First child is the heading, which is not the last node let first_child = root_node.child(0).unwrap(); assert_eq!(first_child.kind(), "atx_heading"); - assert_eq!(is_last_node(input, &first_child), false); + assert!(!is_last_node(input, &first_child)); // Last child is the paragraph, but it's not the deepest node let last_child = root_node.child(root_node.named_child_count() - 1).unwrap(); assert_eq!(last_child.kind(), "paragraph"); - assert_eq!(is_last_node(input, &last_child), false); + assert!(!is_last_node(input, &last_child)); // Text node is the deepest, rightmost node that ends at the input end let text_node = last_child.child(0).unwrap(); assert_eq!(text_node.kind(), "text"); - assert_eq!(is_last_node(input, &text_node), true); + assert!(is_last_node(input, &text_node)); } #[test] diff --git a/src/mdschema/validator/validator.rs b/src/mdschema/validator/validator.rs index 1c56606..bbf250a 100644 --- a/src/mdschema/validator/validator.rs +++ b/src/mdschema/validator/validator.rs @@ -177,7 +177,7 @@ impl Validator { farthest_reached_pos.walk_cursors_to_pos(&mut schema_cursor, &mut input_cursor); let walker = ValidatorWalker::new(schema_cursor, &schema_str, input_cursor, &input_str); - NodeVsNodeValidator::default().validate(&walker, got_eof) + NodeVsNodeValidator.validate(&walker, got_eof) }; self.push_validation_result(validation_result); @@ -295,7 +295,7 @@ mod tests { validator .input_tree .root_node() - .utf8_text(&validator.last_input_str().as_bytes()) + .utf8_text(validator.last_input_str().as_bytes()) .expect("Failed to get input text"), "Updated input" ); @@ -1083,15 +1083,13 @@ Content for section 3."#; let mut validator = Validator::new(schema, "", false).expect("Failed to create validator"); // Incrementally add content in logical chunks - let chunks = vec![ - "# Title\n\n", + let chunks = ["# Title\n\n", "# Title\n\n## Section 1\n\n", "# Title\n\n## Section 1\n\nContent for section 1.\n\n", "# Title\n\n## Section 1\n\nContent for section 1.\n\n## Section 2\n\n", "# Title\n\n## Section 1\n\nContent for section 1.\n\n## Section 2\n\nContent for section 2.\n\n", "# Title\n\n## Section 1\n\nContent for section 1.\n\n## Section 2\n\nContent for section 2.\n\n## Section 3\n\n", - input_complete, - ]; + input_complete]; for (i, chunk) in chunks.iter().enumerate() { let is_eof = i == chunks.len() - 1; From 11d5c985b3de139ffe4d0138c94d6ff0c4613211 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 19:02:03 -0500 Subject: [PATCH 27/33] update the example --- README.md | 2 +- .../node_walker/helpers/expected_input_nodes.rs | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 30a0a37..01b41d9 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ We plan to eventually support converting a Markdown schema into a JSON schema de You can find the full docs [here](https://404wolf.github.io/mdvalidate/)! -## Kitchen Sink Example (current + planned) +## Kitchen Sink Example Schema: diff --git a/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs b/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs index e281ac1..669ea36 100644 --- a/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs +++ b/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs @@ -8,9 +8,15 @@ use crate::mdschema::validator::{ matcher_extras::{get_after_extras, get_all_extras}, }, ts_types::*, - ts_utils::{get_next_node, get_node_text}, + ts_utils::get_next_node, }; +/// Get node text without trimming, even for table cells. +/// This is needed for structural analysis where we need to see the actual content. +fn get_node_text_raw<'a>(node: &tree_sitter::Node, src: &'a str) -> &'a str { + node.utf8_text(src.as_bytes()).unwrap() +} + /// Determine the number of nodes we expect in some corresponding input string. /// /// # Algorithm @@ -175,7 +181,7 @@ fn text_after_matcher<'a>( return Ok(""); } - let next_node_str = get_node_text(&next_node, schema_str); + let next_node_str = get_node_text_raw(&next_node, schema_str); Ok(get_after_extras(next_node_str).unwrap_or("")) } @@ -202,7 +208,7 @@ fn extras_after_matcher<'a>( match get_next_node(schema_cursor) { Some(next_node) => { - let next_node_str = get_node_text(&next_node, schema_str); + let next_node_str = get_node_text_raw(&next_node, schema_str); Ok(get_all_extras(next_node_str).unwrap_or("")) } From f1f3a91e18227319cd91bc217d486c74cb5e99b7 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 19:29:50 -0500 Subject: [PATCH 28/33] make clippy happy --- .github/workflows/tests.yaml | 2 + examples/simple.rs | 2 +- src/cmd.rs | 89 ++++++---- src/mdschema/mod.rs | 4 +- .../{validator => validation}/errors.rs | 16 +- .../matchers}/matcher.rs | 16 +- .../matchers}/matcher_extras.rs | 4 +- .../matcher => validation/matchers}/mod.rs | 0 src/mdschema/{validator => validation}/mod.rs | 4 +- .../node_pos_pair.rs | 4 +- .../{validator => validation}/ts_types.rs | 8 +- .../{validator => validation}/ts_utils.rs | 161 +++++++++--------- .../{validator => validation}/utils.rs | 80 +++++---- .../{validator => validation}/validator.rs | 19 ++- .../validator_walker.rs | 0 .../helpers/check_repeating_matchers.rs | 8 +- .../walkers}/helpers/compare_node_kinds.rs | 8 +- .../walkers}/helpers/compare_text_contents.rs | 12 +- .../count_non_literal_matchers_in_children.rs | 12 +- .../walkers}/helpers/curly_matchers.rs | 2 +- .../walkers}/helpers/expected_input_nodes.rs | 41 ++--- .../walkers}/helpers/mod.rs | 0 .../walkers}/helpers/node_children_lengths.rs | 2 +- .../node_walker => validation/walkers}/mod.rs | 2 +- .../walkers}/node_walker.rs | 6 +- .../walkers}/utils.rs | 6 +- .../walkers}/validation_result.rs | 10 +- .../walkers}/validators/code.rs | 79 +++++---- .../walkers}/validators/containers.rs | 20 +-- .../walkers}/validators/headings.rs | 18 +- .../walkers}/validators/links.rs | 22 +-- .../walkers}/validators/lists.rs | 18 +- .../walkers}/validators/matchers.rs | 28 +-- .../walkers}/validators/mod.rs | 8 +- .../walkers}/validators/nodes.rs | 32 ++-- .../walkers}/validators/quotes.rs | 10 +- .../walkers}/validators/tables.rs | 22 +-- .../walkers}/validators/textual.rs | 14 +- tests/code.rs | 2 +- tests/headings.rs | 2 +- tests/helpers/mod.rs | 4 +- tests/links.rs | 2 +- tests/lists.rs | 2 +- tests/matchers.rs | 2 +- tests/misc.rs | 2 +- tests/quotes.rs | 2 +- tests/rulers.rs | 2 +- tests/tables.rs | 2 +- tests/textual.rs | 2 +- tests/textual_container.rs | 2 +- 50 files changed, 422 insertions(+), 393 deletions(-) rename src/mdschema/{validator => validation}/errors.rs (98%) rename src/mdschema/{validator/matcher => validation/matchers}/matcher.rs (98%) rename src/mdschema/{validator/matcher => validation/matchers}/matcher_extras.rs (98%) rename src/mdschema/{validator/matcher => validation/matchers}/mod.rs (100%) rename src/mdschema/{validator => validation}/mod.rs (77%) rename src/mdschema/{validator => validation}/node_pos_pair.rs (94%) rename src/mdschema/{validator => validation}/ts_types.rs (97%) rename src/mdschema/{validator => validation}/ts_utils.rs (84%) rename src/mdschema/{validator => validation}/utils.rs (66%) rename src/mdschema/{validator => validation}/validator.rs (99%) rename src/mdschema/{validator => validation}/validator_walker.rs (100%) rename src/mdschema/{validator/node_walker => validation/walkers}/helpers/check_repeating_matchers.rs (90%) rename src/mdschema/{validator/node_walker => validation/walkers}/helpers/compare_node_kinds.rs (95%) rename src/mdschema/{validator/node_walker => validation/walkers}/helpers/compare_text_contents.rs (96%) rename src/mdschema/{validator/node_walker => validation/walkers}/helpers/count_non_literal_matchers_in_children.rs (95%) rename src/mdschema/{validator/node_walker => validation/walkers}/helpers/curly_matchers.rs (96%) rename src/mdschema/{validator/node_walker => validation/walkers}/helpers/expected_input_nodes.rs (94%) rename src/mdschema/{validator/node_walker => validation/walkers}/helpers/mod.rs (100%) rename src/mdschema/{validator/node_walker => validation/walkers}/helpers/node_children_lengths.rs (98%) rename src/mdschema/{validator/node_walker => validation/walkers}/mod.rs (90%) rename src/mdschema/{validator/node_walker => validation/walkers}/node_walker.rs (96%) rename src/mdschema/{validator/node_walker => validation/walkers}/utils.rs (88%) rename src/mdschema/{validator/node_walker => validation/walkers}/validation_result.rs (95%) rename src/mdschema/{validator/node_walker => validation/walkers}/validators/code.rs (83%) rename src/mdschema/{validator/node_walker => validation/walkers}/validators/containers.rs (97%) rename src/mdschema/{validator/node_walker => validation/walkers}/validators/headings.rs (95%) rename src/mdschema/{validator/node_walker => validation/walkers}/validators/links.rs (95%) rename src/mdschema/{validator/node_walker => validation/walkers}/validators/lists.rs (99%) rename src/mdschema/{validator/node_walker => validation/walkers}/validators/matchers.rs (98%) rename src/mdschema/{validator/node_walker => validation/walkers}/validators/mod.rs (97%) rename src/mdschema/{validator/node_walker => validation/walkers}/validators/nodes.rs (93%) rename src/mdschema/{validator/node_walker => validation/walkers}/validators/quotes.rs (91%) rename src/mdschema/{validator/node_walker => validation/walkers}/validators/tables.rs (98%) rename src/mdschema/{validator/node_walker => validation/walkers}/validators/textual.rs (90%) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index f2d50fd..52f7c02 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -40,6 +40,8 @@ jobs: run: nix develop --command cargo build - name: Run tests run: nix develop --command cargo test + - name: Run clippy + run: nix develop --command cargo clippy -- -D warnings - name: Save Cargo cache if: always() uses: actions/cache/save@v4 diff --git a/examples/simple.rs b/examples/simple.rs index 4f4a14d..59c5378 100644 --- a/examples/simple.rs +++ b/examples/simple.rs @@ -1,4 +1,4 @@ -use mdvalidate::{Validator, mdschema::validator::errors::pretty_print_error}; +use mdvalidate::{Validator, mdschema::validation::errors::pretty_print_error}; fn main() { // Define a simple schema: a heading with a name and a list diff --git a/src/cmd.rs b/src/cmd.rs index b399af4..1742cb9 100644 --- a/src/cmd.rs +++ b/src/cmd.rs @@ -1,4 +1,4 @@ -use crate::mdschema::validator::{ +use crate::mdschema::validation::{ errors::{ ParserError, PrettyPrintError, ValidationError, debug_print_error, pretty_print_error, }, @@ -20,6 +20,14 @@ pub enum ProcessingError { Utf8(std::str::Utf8Error), } +#[derive(Debug)] +pub struct ProcessingResult { + pub errors: Vec, + pub matches: Value, + pub validator: Validator, + pub input_str: String, +} + impl From for ProcessingError { fn from(error: std::io::Error) -> Self { ProcessingError::Io(error) @@ -76,50 +84,57 @@ impl From for ProcessingError { } } -pub fn process( - schema_str: &String, - input: &mut R, - fast_fail: bool, -) -> Result<((Vec, Value), Validator, String), ProcessingError> { - let buffer_size = get_buffer_size(); +impl ProcessingResult { + pub fn process( + schema_str: &str, + input: &mut R, + fast_fail: bool, + ) -> Result { + let buffer_size = get_buffer_size(); - let mut input_str = String::new(); - let mut buffer = vec![0; buffer_size]; + let mut input_str = String::new(); + let mut buffer = vec![0; buffer_size]; - let mut validator = Validator::new_incomplete(schema_str.as_str(), input_str.as_str()) - .ok_or(ValidationError::ValidatorCreationFailed)?; + let mut validator = Validator::new_incomplete(schema_str, input_str.as_str()) + .ok_or(ValidationError::ValidatorCreationFailed)?; - loop { - let bytes_read = input.read(&mut buffer)?; + loop { + let bytes_read = input.read(&mut buffer)?; - // If we're done reading, mark EOF - if bytes_read == 0 { - validator.read_final_input(&input_str)?; - validator.validate(); + // If we're done reading, mark EOF + if bytes_read == 0 { + validator.read_final_input(&input_str)?; + validator.validate(); - break; - } + break; + } - let new_text = std::str::from_utf8(&buffer[..bytes_read])?; - input_str.push_str(new_text); + let new_text = std::str::from_utf8(&buffer[..bytes_read])?; + input_str.push_str(new_text); - validator.read_more_input(&input_str)?; - validator.validate(); + validator.read_more_input(&input_str)?; + validator.validate(); - // Check for fast-fail AFTER validation - if fast_fail && validator.errors_so_far().count() > 0 { - break; + // Check for fast-fail AFTER validation + if fast_fail && validator.errors_so_far().count() > 0 { + break; + } } - } - let errors: Vec<_> = validator.errors_so_far().cloned().collect(); - let matches = validator.matches_so_far().clone(); + let errors: Vec<_> = validator.errors_so_far().cloned().collect(); + let matches = validator.matches_so_far().clone(); - Ok(((errors, matches), validator, input_str)) + Ok(ProcessingResult { + errors, + matches, + validator, + input_str, + }) + } } pub fn process_stdio( - schema_str: &String, + schema_str: &str, input: &mut R, output: &mut Option<&mut W>, filename: &str, @@ -127,7 +142,12 @@ pub fn process_stdio( quiet: bool, debug_mode: bool, ) -> Result<((Vec, Value), bool), ProcessingError> { - let ((errors, matches), validator, _input_str) = process(schema_str, input, fast_fail)?; + let ProcessingResult { + errors, + matches, + validator, + input_str: _input_str, + } = ProcessingResult::process(schema_str, input, fast_fail)?; let mut errored = false; if errors.is_empty() { @@ -175,9 +195,10 @@ mod tests { mut input: R, fast_fail: bool, ) -> (Vec, Value) { - let ((errors, matches), _validator, _) = process(schema, &mut input, fast_fail) + let result = ProcessingResult::process(schema, &mut input, fast_fail) .expect("Validation should complete without errors"); - (errors, matches) + + (result.errors, result.matches) } /// A custom reader that only reads a specific number of bytes at a time diff --git a/src/mdschema/mod.rs b/src/mdschema/mod.rs index 5b3449b..0124f37 100644 --- a/src/mdschema/mod.rs +++ b/src/mdschema/mod.rs @@ -1,3 +1,3 @@ -pub mod validator; +pub mod validation; -pub use validator::validator::Validator; +pub use validation::validator::Validator; diff --git a/src/mdschema/validator/errors.rs b/src/mdschema/validation/errors.rs similarity index 98% rename from src/mdschema/validator/errors.rs rename to src/mdschema/validation/errors.rs index d8750af..e5adff5 100644 --- a/src/mdschema/validator/errors.rs +++ b/src/mdschema/validation/errors.rs @@ -1,5 +1,5 @@ -use crate::mdschema::validator::{ - matcher::{ +use crate::mdschema::validation::{ + matchers::{ matcher::*, matcher_extras::{MatcherExtras, MatcherExtrasError}, }, @@ -9,8 +9,8 @@ use ariadne::{Color, Label, Report, ReportKind, Source}; use std::fmt; use tree_sitter::TreeCursor; -use crate::mdschema::validator::{ - node_walker::utils::pretty_print_cursor_pair, +use crate::mdschema::validation::{ + walkers::utils::pretty_print_cursor_pair, ts_utils::{find_node_by_index, walk_to_root}, }; @@ -35,11 +35,11 @@ macro_rules! invariant_violation { ($schema_cursor:expr, $input_cursor:expr, $message:expr $(, $($args:tt)*)?) => {{ #[cfg(feature = "invariant_violations")] { - let cursor_info = $crate::mdschema::validator::node_walker::utils::pretty_print_cursor_pair( + let cursor_info = $crate::mdschema::validation::walkers::utils::pretty_print_cursor_pair( $schema_cursor, $input_cursor, ); - let error_msg = $crate::mdschema::validator::errors::invariant_violation_message( + let error_msg = $crate::mdschema::validation::errors::invariant_violation_message( Some(($schema_cursor, $input_cursor)), format!($message $(, $($args)*)?), module_path!(), @@ -55,7 +55,7 @@ macro_rules! invariant_violation { ($message:expr $(, $($args:tt)*)?) => {{ #[cfg(feature = "invariant_violations")] { - let error_msg = $crate::mdschema::validator::errors::invariant_violation_message( + let error_msg = $crate::mdschema::validation::errors::invariant_violation_message( None, format!($message $(, $($args)*)?), module_path!(), @@ -958,7 +958,7 @@ fn node_content_by_index<'a>( #[cfg(test)] mod tests { - use crate::mdschema::validator::ts_utils::new_markdown_parser; + use crate::mdschema::validation::ts_utils::new_markdown_parser; use super::*; diff --git a/src/mdschema/validator/matcher/matcher.rs b/src/mdschema/validation/matchers/matcher.rs similarity index 98% rename from src/mdschema/validator/matcher/matcher.rs rename to src/mdschema/validation/matchers/matcher.rs index 68f4e7e..d9793d7 100644 --- a/src/mdschema/validator/matcher/matcher.rs +++ b/src/mdschema/validation/matchers/matcher.rs @@ -1,13 +1,13 @@ #![allow(dead_code)] -use crate::{invariant_violation, mdschema::validator::matcher::matcher_extras::MatcherExtras}; +use crate::{invariant_violation, mdschema::validation::matchers::matcher_extras::MatcherExtras}; use core::fmt; use regex::Regex; use std::{collections::HashSet, sync::LazyLock}; use tree_sitter::TreeCursor; -use crate::mdschema::validator::{ - matcher::matcher_extras::{MatcherExtrasError, partition_at_special_chars}, +use crate::mdschema::validation::{ + matchers::matcher_extras::{MatcherExtrasError, partition_at_special_chars}, ts_types::*, ts_utils::{get_next_node, get_node_and_next_node, get_node_text}, }; @@ -342,11 +342,7 @@ impl fmt::Display for Matcher { MatcherKind::Regex(regex) => { let regex_str = regex.as_str(); // The regex is stored as "^", so remove the leading ^ - let pattern_str = if regex_str.starts_with('^') { - ®ex_str[1..] - } else { - regex_str - }; + let pattern_str = regex_str.strip_prefix('^').unwrap_or(regex_str); match &self.id { Some(id) => write!(f, "{}:/{}/", id, pattern_str), @@ -421,8 +417,8 @@ pub fn extract_text_matcher(cursor: &TreeCursor, str: &str) -> Result = LazyLock::new(|| Regex::new(r"\{(\d*),(\d*)\}").unwrap()); @@ -98,7 +98,7 @@ impl std::fmt::Display for MatcherExtrasError { /// # Examples /// /// ``` -/// use mdvalidate::mdschema::validator::matcher::matcher_extras::MatcherExtras; +/// use mdvalidate::mdschema::validation::matcher::matcher_extras::MatcherExtras; /// /// // Matcher with repeat limits: `name:/\w+/`{2,5} /// let extras = MatcherExtras::try_new(Some("{2,5}")).unwrap(); diff --git a/src/mdschema/validator/matcher/mod.rs b/src/mdschema/validation/matchers/mod.rs similarity index 100% rename from src/mdschema/validator/matcher/mod.rs rename to src/mdschema/validation/matchers/mod.rs diff --git a/src/mdschema/validator/mod.rs b/src/mdschema/validation/mod.rs similarity index 77% rename from src/mdschema/validator/mod.rs rename to src/mdschema/validation/mod.rs index cfeb164..7e40e37 100644 --- a/src/mdschema/validator/mod.rs +++ b/src/mdschema/validation/mod.rs @@ -1,7 +1,7 @@ pub mod errors; -pub mod matcher; +pub mod matchers; pub(crate) mod node_pos_pair; -pub(crate) mod node_walker; +pub(crate) mod walkers; pub(crate) mod ts_types; pub(crate) mod ts_utils; mod utils; diff --git a/src/mdschema/validator/node_pos_pair.rs b/src/mdschema/validation/node_pos_pair.rs similarity index 94% rename from src/mdschema/validator/node_pos_pair.rs rename to src/mdschema/validation/node_pos_pair.rs index c5ccda6..e71a782 100644 --- a/src/mdschema/validator/node_pos_pair.rs +++ b/src/mdschema/validation/node_pos_pair.rs @@ -28,7 +28,7 @@ impl NodePosPair { } /// Convert the `NodePosPair` to a tuple of schema and input indexes. - pub fn to_pos(&self) -> (usize, usize) { + pub fn as_pos(&self) -> (usize, usize) { (self.schema_index, self.input_index) } @@ -45,7 +45,7 @@ impl NodePosPair { schema_cursor: &mut TreeCursor, input_cursor: &mut TreeCursor, ) { - let (schema_pos, input_pos) = self.to_pos(); + let (schema_pos, input_pos) = self.as_pos(); schema_cursor.goto_descendant(schema_pos); input_cursor.goto_descendant(input_pos); diff --git a/src/mdschema/validator/ts_types.rs b/src/mdschema/validation/ts_types.rs similarity index 97% rename from src/mdschema/validator/ts_types.rs rename to src/mdschema/validation/ts_types.rs index fe1bcc2..7af5ba2 100644 --- a/src/mdschema/validator/ts_types.rs +++ b/src/mdschema/validation/ts_types.rs @@ -195,13 +195,9 @@ node_predicate_pair!( /// Check if both nodes are top-level nodes (document or heading). pub fn both_are_matching_top_level_nodes(schema_node: &Node, input_node: &Node) -> bool { - if schema_node.kind() != input_node.kind() { - return false; - } - match schema_node.kind() { - "document" => true, - "atx_heading" => true, + _ if schema_node.kind() != input_node.kind() => false, + "document" | "atx_heading" => true, _ => false, } } diff --git a/src/mdschema/validator/ts_utils.rs b/src/mdschema/validation/ts_utils.rs similarity index 84% rename from src/mdschema/validator/ts_utils.rs rename to src/mdschema/validation/ts_utils.rs index 1c6d731..91eef0e 100644 --- a/src/mdschema/validator/ts_utils.rs +++ b/src/mdschema/validation/ts_utils.rs @@ -5,8 +5,8 @@ use tree_sitter::{Node, Parser, Tree, TreeCursor}; use tree_sitter_markdown::language; #[cfg(feature = "invariant_violations")] -use crate::mdschema::validator::ts_types::*; -use crate::mdschema::validator::{errors::ValidationError, validator::ValidatorState}; +use crate::mdschema::validation::ts_types::*; +use crate::mdschema::validation::{errors::ValidationError, validator::ValidatorState}; use regex::Regex; use std::sync::LazyLock; @@ -193,90 +193,97 @@ pub fn has_single_code_child(schema_cursor: &TreeCursor) -> bool { code_child_count == 1 } -/// Extract the language and body of a codeblock. -/// -/// # Arguments -/// -/// * `cursor`: The cursor pointing to the codeblock node. -/// * `src`: The source text of the document. -/// -/// # Returns -/// -/// An `Option` containing: -/// - The optional language tuple: `(language_string, descendant_index)` if the language text is present -/// - The body tuple: `(body_string, descendant_index)` of the code content -/// Where `descendant_index` is the index of the descendant node that contains the language or body text. -/// -/// Returns `None` if the codeblock is invalid or it isn't a codeblock to begin with. -pub fn extract_codeblock_contents( - cursor: &TreeCursor, - src: &str, -) -> Result, (String, usize))>, ValidationError> { - // A codeblock looks like this: - // - // └── (fenced_code_block) - // ├── (info_string)? // only present when there is a language - // │ └── (text) - // └── (code_fence_content) - // └── (text) - // - // └── (fenced_code_block) - // └── (code_fence_content) - // └── (text) - - let mut cursor = cursor.clone(); - if cursor.node().kind() != "fenced_code_block" { - return Ok(None); - } - - // Move to the first child and determine if it's an info_string or the content - if !cursor.goto_first_child() { - return Ok(None); - } +/// Represents the extracted contents of a markdown codeblock. +pub struct CodeblockContents { + pub lang: Option<(String, usize)>, + pub code: (String, usize), +} - let mut language: Option<(String, usize)> = None; +impl CodeblockContents { + /// Extract the language and body of a codeblock. + /// + /// # Arguments + /// + /// * `cursor`: The cursor pointing to the codeblock node. + /// * `src`: The source text of the document. + /// + /// # Returns + /// + /// An `Option` containing `CodeblockContents` with: + /// + /// - The optional language tuple: `(language_string, descendant_index)` if the language text is present + /// - The body tuple: `(body_string, descendant_index)` of the code content + /// + /// Where `descendant_index` is the index of the descendant node that contains the language or body text. + /// + /// Returns `None` if the codeblock is invalid or it isn't a codeblock to begin with. + pub fn try_from_cursor(cursor: &TreeCursor, src: &str) -> Result, ValidationError> { + // A codeblock looks like this: + // + // └── (fenced_code_block) + // ├── (info_string)? // only present when there is a language + // │ └── (text) + // └── (code_fence_content) + // └── (text) + // + // └── (fenced_code_block) + // └── (code_fence_content) + // └── (text) + + let mut cursor = cursor.clone(); + if cursor.node().kind() != "fenced_code_block" { + return Ok(None); + } - if cursor.node().kind() == "info_string" { - // Extract language from info_string -> text - if !cursor.goto_first_child() || cursor.node().kind() != "text" { + // Move to the first child and determine if it's an info_string or the content + if !cursor.goto_first_child() { return Ok(None); } - language = Some(( - get_node_text(&cursor.node(), src).to_string(), - cursor.descendant_index(), - )); - // Go back to info_string, then to its sibling: code_fence_content - if !cursor.goto_parent() || !cursor.goto_next_sibling() { + let mut language: Option<(String, usize)> = None; + + if cursor.node().kind() == "info_string" { + // Extract language from info_string -> text + if !cursor.goto_first_child() || cursor.node().kind() != "text" { + return Ok(None); + } + language = Some(( + get_node_text(&cursor.node(), src).to_string(), + cursor.descendant_index(), + )); + + // Go back to info_string, then to its sibling: code_fence_content + if !cursor.goto_parent() || !cursor.goto_next_sibling() { + return Ok(None); + } + } else if cursor.node().kind() != "code_fence_content" { + // First child is neither info_string nor code_fence_content -> invalid layout return Ok(None); } - } else if cursor.node().kind() != "code_fence_content" { - // First child is neither info_string nor code_fence_content -> invalid layout - return Ok(None); - } - // At this point, cursor must be at code_fence_content - #[cfg(feature = "invariant_violations")] - if cursor.node().kind() != "code_fence_content" { - invariant_violation!( - &cursor, - &cursor, - "expected code_fence_content while extracting code block" - ); - } + // At this point, cursor must be at code_fence_content + #[cfg(feature = "invariant_violations")] + if cursor.node().kind() != "code_fence_content" { + invariant_violation!( + &cursor, + &cursor, + "expected code_fence_content while extracting code block" + ); + } - // Get the full text from code_fence_content node itself, not just the first child - let code_fence_node = cursor.node(); - let text = get_node_text(&code_fence_node, src); + // Get the full text from code_fence_content node itself, not just the first child + let code_fence_node = cursor.node(); + let text = get_node_text(&code_fence_node, src); - // Navigate to first text child to get its descendant_index - if !cursor.goto_first_child() || cursor.node().kind() != "text" { - return Ok(None); - } + // Navigate to first text child to get its descendant_index + if !cursor.goto_first_child() || cursor.node().kind() != "text" { + return Ok(None); + } - let body = (text.to_string(), cursor.descendant_index()); + let body = (text.to_string(), cursor.descendant_index()); - Ok(Some((language, body))) + Ok(Some(CodeblockContents { lang: language, code: body })) + } } /// Walk from a list_item node to its content paragraph. @@ -332,7 +339,7 @@ pub fn has_next_sibling(cursor: &TreeCursor) -> bool { #[allow(dead_code)] pub fn validate_str(schema: &str, input: &str) -> (serde_json::Value, Vec) { - use crate::mdschema::validator::validator::Validator; + use crate::mdschema::validation::validator::Validator; let mut validator = Validator::new_complete(schema, input).unwrap(); validator.validate(); @@ -350,8 +357,8 @@ pub fn validate_str(schema: &str, input: &str) -> (serde_json::Value, Vec Tree { #[cfg(test)] mod tests { - use crate::mdschema::validator::ts_utils::extract_codeblock_contents; - - use super::*; + use super::{join_values, parse_markdown_and_get_tree, Value}; + use crate::mdschema::validation::ts_utils::CodeblockContents; #[test] fn test_extract_codeblock_contents() { @@ -57,40 +56,44 @@ mod tests { let tree = parse_markdown_and_get_tree(input); let mut cursor = tree.walk(); cursor.goto_first_child(); - assert_eq!( - extract_codeblock_contents(&cursor, input).unwrap(), - Some((None, ("code".into(), 3))) - ); + let contents = CodeblockContents::try_from_cursor(&cursor, input) + .unwrap() + .expect("expected codeblock contents"); + assert_eq!(contents.lang, None); + assert_eq!(contents.code, ("code".into(), 3)); // With language, 3 backticks let input = "```rust\ncode\n```\n"; let tree = parse_markdown_and_get_tree(input); let mut cursor = tree.walk(); cursor.goto_first_child(); - assert_eq!( - extract_codeblock_contents(&cursor, input).unwrap(), - Some((Some(("rust".into(), 3)), ("code".into(), 5))) - ); + let contents = CodeblockContents::try_from_cursor(&cursor, input) + .unwrap() + .expect("expected codeblock contents"); + assert_eq!(contents.lang, Some(("rust".into(), 3))); + assert_eq!(contents.code, ("code".into(), 5)); // Without language, 4 backticks let input = "````\ncode\n````\n"; let tree = parse_markdown_and_get_tree(input); let mut cursor = tree.walk(); cursor.goto_first_child(); - assert_eq!( - extract_codeblock_contents(&cursor, input).unwrap(), - Some((None, ("code".into(), 3))) - ); + let contents = CodeblockContents::try_from_cursor(&cursor, input) + .unwrap() + .expect("expected codeblock contents"); + assert_eq!(contents.lang, None); + assert_eq!(contents.code, ("code".into(), 3)); // With language, 4 backticks let input = "````rust\ncode\n````\n"; let tree = parse_markdown_and_get_tree(input); let mut cursor = tree.walk(); cursor.goto_first_child(); - assert_eq!( - extract_codeblock_contents(&cursor, input).unwrap(), - Some((Some(("rust".into(), 3)), ("code".into(), 5))) - ); + let contents = CodeblockContents::try_from_cursor(&cursor, input) + .unwrap() + .expect("expected codeblock contents"); + assert_eq!(contents.lang, Some(("rust".into(), 3))); + assert_eq!(contents.code, ("code".into(), 5)); } #[test] @@ -100,22 +103,24 @@ mod tests { let tree = parse_markdown_and_get_tree(input); let mut cursor = tree.walk(); cursor.goto_first_child(); - assert_eq!( - extract_codeblock_contents(&cursor, input).unwrap(), - Some((None, ("line1\nline2\nline3".into(), 3))) - ); + let contents = CodeblockContents::try_from_cursor(&cursor, input) + .unwrap() + .expect("expected codeblock contents"); + assert_eq!(contents.lang, None); + assert_eq!(contents.code, ("line1\nline2\nline3".into(), 3)); // Multiline code with language let input = "```rust\nfn main() {\n println!(\"Hello\");\n}\n```\n"; let tree = parse_markdown_and_get_tree(input); let mut cursor = tree.walk(); cursor.goto_first_child(); + let contents = CodeblockContents::try_from_cursor(&cursor, input) + .unwrap() + .expect("expected codeblock contents"); + assert_eq!(contents.lang, Some(("rust".into(), 3))); assert_eq!( - extract_codeblock_contents(&cursor, input).unwrap(), - Some(( - Some(("rust".into(), 3)), - ("fn main() {\n println!(\"Hello\");\n}".into(), 5) - )) + contents.code, + ("fn main() {\n println!(\"Hello\");\n}".into(), 5) ); // Multiline code with indentation @@ -123,15 +128,16 @@ mod tests { let tree = parse_markdown_and_get_tree(input); let mut cursor = tree.walk(); cursor.goto_first_child(); + let contents = CodeblockContents::try_from_cursor(&cursor, input) + .unwrap() + .expect("expected codeblock contents"); + assert_eq!(contents.lang, Some(("python".into(), 3))); assert_eq!( - extract_codeblock_contents(&cursor, input).unwrap(), - Some(( - Some(("python".into(), 3)), - ( - "def hello():\n print(\"world\")\n return True".into(), - 5 - ) - )) + contents.code, + ( + "def hello():\n print(\"world\")\n return True".into(), + 5 + ) ); } diff --git a/src/mdschema/validator/validator.rs b/src/mdschema/validation/validator.rs similarity index 99% rename from src/mdschema/validator/validator.rs rename to src/mdschema/validation/validator.rs index bbf250a..c8dde85 100644 --- a/src/mdschema/validator/validator.rs +++ b/src/mdschema/validation/validator.rs @@ -2,10 +2,10 @@ use line_col::LineColLookup; use serde_json::{Map, Value}; use tree_sitter::{InputEdit, Point, Tree}; -use crate::mdschema::validator::{ +use crate::mdschema::validation::{ errors::{ParserError, ValidationError}, node_pos_pair::NodePosPair, - node_walker::{ + walkers::{ ValidationResult, validators::{Validator as ValidatorTrait, nodes::NodeVsNodeValidator}, }, @@ -16,6 +16,7 @@ use crate::mdschema::validator::{ /// A Validator implementation that uses a zipper tree approach to validate /// an input Markdown document against a markdown schema treesitter tree. +#[derive(Debug)] pub struct Validator { /// The schema tree, which does not change after initialization. schema_tree: Tree, @@ -255,7 +256,7 @@ impl ValidatorState for Validator { mod tests { use serde_json::json; - use crate::mdschema::validator::errors::{SchemaError, SchemaViolationError}; + use crate::mdschema::validation::errors::{SchemaError, SchemaViolationError}; use super::*; @@ -432,7 +433,7 @@ fooobar ValidationError::SchemaViolation(SchemaViolationError::MalformedNodeStructure { schema_index, input_index, - kind, + .. }) => { assert_eq!(*schema_index, 7); assert_eq!(*input_index, 5); @@ -1083,25 +1084,27 @@ Content for section 3."#; let mut validator = Validator::new(schema, "", false).expect("Failed to create validator"); // Incrementally add content in logical chunks - let chunks = ["# Title\n\n", + let chunks = [ + "# Title\n\n", "# Title\n\n## Section 1\n\n", "# Title\n\n## Section 1\n\nContent for section 1.\n\n", "# Title\n\n## Section 1\n\nContent for section 1.\n\n## Section 2\n\n", "# Title\n\n## Section 1\n\nContent for section 1.\n\n## Section 2\n\nContent for section 2.\n\n", "# Title\n\n## Section 1\n\nContent for section 1.\n\n## Section 2\n\nContent for section 2.\n\n## Section 3\n\n", - input_complete]; + input_complete, + ]; for (i, chunk) in chunks.iter().enumerate() { let is_eof = i == chunks.len() - 1; - let indices_before = validator.farthest_reached_pos().to_pos(); + let indices_before = validator.farthest_reached_pos().as_pos(); validator .read_input(chunk, is_eof) .expect("Failed to read input"); validator.validate(); - let indices_after = validator.farthest_reached_pos().to_pos(); + let indices_after = validator.farthest_reached_pos().as_pos(); // Indices should advance (or stay the same if nothing new to validate) // They should NOT reset to 0 diff --git a/src/mdschema/validator/validator_walker.rs b/src/mdschema/validation/validator_walker.rs similarity index 100% rename from src/mdschema/validator/validator_walker.rs rename to src/mdschema/validation/validator_walker.rs diff --git a/src/mdschema/validator/node_walker/helpers/check_repeating_matchers.rs b/src/mdschema/validation/walkers/helpers/check_repeating_matchers.rs similarity index 90% rename from src/mdschema/validator/node_walker/helpers/check_repeating_matchers.rs rename to src/mdschema/validation/walkers/helpers/check_repeating_matchers.rs index 0ceb1ea..c1e190e 100644 --- a/src/mdschema/validator/node_walker/helpers/check_repeating_matchers.rs +++ b/src/mdschema/validation/walkers/helpers/check_repeating_matchers.rs @@ -1,7 +1,7 @@ use tree_sitter::TreeCursor; -use crate::mdschema::validator::{ - matcher::matcher::{Matcher, MatcherError}, +use crate::mdschema::validation::{ + matchers::matcher::{Matcher, MatcherError}, ts_types::*, }; @@ -38,8 +38,8 @@ pub fn check_repeating_matchers(schema_cursor: &TreeCursor, schema_str: &str) -> #[cfg(test)] mod tests { - use crate::mdschema::validator::{ - node_walker::helpers::check_repeating_matchers::check_repeating_matchers, + use crate::mdschema::validation::{ + walkers::helpers::check_repeating_matchers::check_repeating_matchers, ts_utils::parse_markdown, }; diff --git a/src/mdschema/validator/node_walker/helpers/compare_node_kinds.rs b/src/mdschema/validation/walkers/helpers/compare_node_kinds.rs similarity index 95% rename from src/mdschema/validator/node_walker/helpers/compare_node_kinds.rs rename to src/mdschema/validation/walkers/helpers/compare_node_kinds.rs index 240e524..a7db116 100644 --- a/src/mdschema/validator/node_walker/helpers/compare_node_kinds.rs +++ b/src/mdschema/validation/walkers/helpers/compare_node_kinds.rs @@ -1,7 +1,7 @@ use tree_sitter::TreeCursor; -use crate::mdschema::validator::errors::{SchemaViolationError, ValidationError}; -use crate::mdschema::validator::ts_utils::{ +use crate::mdschema::validation::errors::{SchemaViolationError, ValidationError}; +use crate::mdschema::validation::ts_utils::{ extract_list_marker, get_heading_kind, is_ordered_list_marker, is_unordered_list_marker, }; @@ -123,7 +123,7 @@ macro_rules! compare_node_kinds_check { $input_str:expr, $result:expr ) => { - if let Some(error) = $crate::mdschema::validator::node_walker::helpers::compare_node_kinds::compare_node_kinds( + if let Some(error) = $crate::mdschema::validation::walkers::helpers::compare_node_kinds::compare_node_kinds( &$schema_cursor, &$input_cursor, $schema_str, @@ -137,7 +137,7 @@ macro_rules! compare_node_kinds_check { #[cfg(test)] mod tests { - use crate::mdschema::validator::ts_utils::parse_markdown; + use crate::mdschema::validation::ts_utils::parse_markdown; use super::*; diff --git a/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs b/src/mdschema/validation/walkers/helpers/compare_text_contents.rs similarity index 96% rename from src/mdschema/validator/node_walker/helpers/compare_text_contents.rs rename to src/mdschema/validation/walkers/helpers/compare_text_contents.rs index 7928b3b..4a8c9aa 100644 --- a/src/mdschema/validator/node_walker/helpers/compare_text_contents.rs +++ b/src/mdschema/validation/walkers/helpers/compare_text_contents.rs @@ -1,13 +1,13 @@ use serde_json::json; use tree_sitter::TreeCursor; -use crate::mdschema::validator::errors::{ +use crate::mdschema::validation::errors::{ NodeContentMismatchKind, SchemaError, SchemaViolationError, ValidationError, }; -use crate::mdschema::validator::matcher::matcher::MatcherError; -use crate::mdschema::validator::node_walker::ValidationResult; -use crate::mdschema::validator::node_walker::helpers::curly_matchers::extract_matcher_from_curly_delineated_text; -use crate::mdschema::validator::ts_utils::get_node_text; +use crate::mdschema::validation::matchers::matcher::MatcherError; +use crate::mdschema::validation::walkers::ValidationResult; +use crate::mdschema::validation::walkers::helpers::curly_matchers::extract_matcher_from_curly_delineated_text; +use crate::mdschema::validation::ts_utils::get_node_text; /// Compare text contents between schema and input nodes. /// Handles both literal text and curly-delimited matchers. @@ -167,7 +167,7 @@ macro_rules! compare_text_contents_check { #[cfg(test)] mod tests { - use crate::mdschema::validator::ts_utils::new_markdown_parser; + use crate::mdschema::validation::ts_utils::new_markdown_parser; use super::*; diff --git a/src/mdschema/validator/node_walker/helpers/count_non_literal_matchers_in_children.rs b/src/mdschema/validation/walkers/helpers/count_non_literal_matchers_in_children.rs similarity index 95% rename from src/mdschema/validator/node_walker/helpers/count_non_literal_matchers_in_children.rs rename to src/mdschema/validation/walkers/helpers/count_non_literal_matchers_in_children.rs index aa98a6c..3490cbe 100644 --- a/src/mdschema/validator/node_walker/helpers/count_non_literal_matchers_in_children.rs +++ b/src/mdschema/validation/walkers/helpers/count_non_literal_matchers_in_children.rs @@ -1,8 +1,8 @@ use tree_sitter::TreeCursor; -use crate::mdschema::validator::{ +use crate::mdschema::validation::{ errors::{SchemaError, ValidationError}, - matcher::{ + matchers::{ matcher::{Matcher, MatcherError}, matcher_extras::get_all_extras, }, @@ -30,7 +30,7 @@ use crate::mdschema::validator::{ /// /// Contains a document with one child, which is just a normal paragraph with a /// matcher in it. - +/// /// Count the number of matchers, starting at some cursor pointing to a textual /// container, and iterating through all of its children. /// @@ -97,10 +97,10 @@ pub fn count_non_literal_matchers_in_children( #[cfg(test)] mod tests { - use crate::mdschema::validator::{ + use crate::mdschema::validation::{ errors::{SchemaError, ValidationError}, - matcher::matcher::MatcherError, - node_walker::helpers::count_non_literal_matchers_in_children::count_non_literal_matchers_in_children, + matchers::matcher::MatcherError, + walkers::helpers::count_non_literal_matchers_in_children::count_non_literal_matchers_in_children, ts_utils::parse_markdown, }; diff --git a/src/mdschema/validator/node_walker/helpers/curly_matchers.rs b/src/mdschema/validation/walkers/helpers/curly_matchers.rs similarity index 96% rename from src/mdschema/validator/node_walker/helpers/curly_matchers.rs rename to src/mdschema/validation/walkers/helpers/curly_matchers.rs index 4358abb..71061ce 100644 --- a/src/mdschema/validator/node_walker/helpers/curly_matchers.rs +++ b/src/mdschema/validation/walkers/helpers/curly_matchers.rs @@ -1,7 +1,7 @@ use regex::Regex; use std::sync::LazyLock; -use crate::mdschema::validator::matcher::matcher::{Matcher, MatcherError}; +use crate::mdschema::validation::matchers::matcher::{Matcher, MatcherError}; static CURLY_MATCHER: LazyLock = LazyLock::new(|| Regex::new(r"^\{(?P.+?)\}(?P.*)?$").unwrap()); diff --git a/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs b/src/mdschema/validation/walkers/helpers/expected_input_nodes.rs similarity index 94% rename from src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs rename to src/mdschema/validation/walkers/helpers/expected_input_nodes.rs index 669ea36..feb69a1 100644 --- a/src/mdschema/validator/node_walker/helpers/expected_input_nodes.rs +++ b/src/mdschema/validation/walkers/helpers/expected_input_nodes.rs @@ -1,9 +1,9 @@ use tree_sitter::TreeCursor; use crate::invariant_violation; -use crate::mdschema::validator::{ +use crate::mdschema::validation::{ errors::{SchemaError, ValidationError}, - matcher::{ + matchers::{ matcher::{Matcher, MatcherError}, matcher_extras::{get_after_extras, get_all_extras}, }, @@ -64,25 +64,16 @@ pub fn expected_input_nodes( Some(at_coalescing) => { let has_extra_text = has_extra_text(&schema_cursor, schema_str)?; - if at_coalescing { - if !has_extra_text { - 1 - } else if next_is_non_text { - 1 - } else { - 0 - } - } else if has_extra_text { - 1 - } else { - 0 - } + ((at_coalescing && (!has_extra_text || next_is_non_text)) + || (!at_coalescing && has_extra_text)) + .into() } - None => match next_at_coalescing_matcher(&schema_cursor, schema_str)? { - Some(next_is_coalescing) if at_text_node && !next_is_coalescing => 1, - Some(_) => 0, - None => 0, - }, + None if at_text_node => matches!( + next_at_coalescing_matcher(&schema_cursor, schema_str)?, + Some(false) + ) + .into(), + None => 0, }; if !schema_cursor.goto_next_sibling() { @@ -137,7 +128,8 @@ fn has_extra_text(schema_cursor: &TreeCursor, schema_str: &str) -> Result { let had_next_matcher = move_cursor_to_next_matcher(&mut lookahead_cursor, schema_str)?; - let has_text_after_matcher = !(text_after_matcher(schema_cursor, schema_str)?).is_empty(); + let has_text_after_matcher = + !(text_after_matcher(schema_cursor, schema_str)?).is_empty(); if has_text_after_matcher { return Ok(true); @@ -145,7 +137,8 @@ fn has_extra_text(schema_cursor: &TreeCursor, schema_str: &str) -> Result NodeWalker<'a, S> { #[cfg(test)] mod tests { - use crate::mdschema::validator::node_walker::utils::validate_str; + use crate::mdschema::validation::walkers::utils::validate_str; use serde_json::json; #[test] diff --git a/src/mdschema/validator/node_walker/utils.rs b/src/mdschema/validation/walkers/utils.rs similarity index 88% rename from src/mdschema/validator/node_walker/utils.rs rename to src/mdschema/validation/walkers/utils.rs index 7f356f8..d05efcd 100644 --- a/src/mdschema/validator/node_walker/utils.rs +++ b/src/mdschema/validation/walkers/utils.rs @@ -3,14 +3,14 @@ use tree_sitter::TreeCursor; #[cfg(test)] use serde_json::Value; -use crate::mdschema::validator::ts_utils::walk_to_root; +use crate::mdschema::validation::ts_utils::walk_to_root; #[cfg(test)] -use crate::mdschema::validator::{errors::ValidationError, validator::Validator}; +use crate::mdschema::validation::{errors::ValidationError, validator::Validator}; use mdvalidate_utils::PrettyPrint; #[cfg(test)] pub fn validate_str(schema: &str, input: &str) -> (Value, Vec, Validator) { - use crate::mdschema::validator::validator::ValidatorState; + use crate::mdschema::validation::validator::ValidatorState; let mut validator = Validator::new_complete(schema, input).unwrap(); validator.validate(); diff --git a/src/mdschema/validator/node_walker/validation_result.rs b/src/mdschema/validation/walkers/validation_result.rs similarity index 95% rename from src/mdschema/validator/node_walker/validation_result.rs rename to src/mdschema/validation/walkers/validation_result.rs index 7236e83..b80b103 100644 --- a/src/mdschema/validator/node_walker/validation_result.rs +++ b/src/mdschema/validation/walkers/validation_result.rs @@ -1,9 +1,9 @@ use serde_json::{Value, json}; use tree_sitter::TreeCursor; -use crate::mdschema::validator::errors::ValidationError; -use crate::mdschema::validator::node_pos_pair::NodePosPair; -use crate::mdschema::validator::utils::join_values; +use crate::mdschema::validation::errors::ValidationError; +use crate::mdschema::validation::node_pos_pair::NodePosPair; +use crate::mdschema::validation::utils::join_values; /// Validation data containing errors and matched values, without position tracking #[derive(Clone, Debug, PartialEq)] @@ -196,7 +196,7 @@ mod tests { result.join_other_result(&ValidationResult::from_descendant_indexes(1, 1)); result.add_error(ValidationError::ValidatorCreationFailed); - assert_eq!(result.farthest_reached_pos().to_pos(), (1, 1)); // the farther! + assert_eq!(result.farthest_reached_pos().as_pos(), (1, 1)); // the farther! assert_eq!(result.value(), &json!({"id": "value"})); assert_eq!(result.errors().len(), 1); @@ -214,7 +214,7 @@ mod tests { result.set_match("id", json!("value")); result.join_other_result(&other); - assert_eq!(result.farthest_reached_pos().to_pos(), (1, 1)); + assert_eq!(result.farthest_reached_pos().as_pos(), (1, 1)); assert_eq!(result.value(), &json!({"id": "value"})); assert_eq!(result.errors().len(), 0); } diff --git a/src/mdschema/validator/node_walker/validators/code.rs b/src/mdschema/validation/walkers/validators/code.rs similarity index 83% rename from src/mdschema/validator/node_walker/validators/code.rs rename to src/mdschema/validation/walkers/validators/code.rs index ffbf324..d6e27f7 100644 --- a/src/mdschema/validator/node_walker/validators/code.rs +++ b/src/mdschema/validation/walkers/validators/code.rs @@ -6,17 +6,17 @@ use serde_json::json; use crate::invariant_violation; -use crate::mdschema::validator::validator_walker::ValidatorWalker; -use crate::mdschema::validator::{ +use crate::mdschema::validation::ts_utils::CodeblockContents; +use crate::mdschema::validation::validator_walker::ValidatorWalker; +use crate::mdschema::validation::{ errors::{NodeContentMismatchKind, SchemaError, SchemaViolationError, ValidationError}, - node_walker::{ + walkers::{ ValidationResult, helpers::curly_matchers::{ extract_id_from_curly_braces, extract_matcher_from_curly_delineated_text, }, validators::ValidatorImpl, }, - ts_utils::extract_codeblock_contents, }; /// Validate a code block against a schema code block. @@ -79,25 +79,24 @@ fn validate_code_vs_code_impl(walker: &ValidatorWalker) -> ValidationResult { ); } - let input_extracted = match extract_codeblock_contents(&input_cursor, walker.input_str()) { - Ok(value) => value, - Err(error) => { - result.add_error(error); - return result; - } - }; - let schema_extracted = match extract_codeblock_contents(&schema_cursor, walker.schema_str()) { - Ok(value) => value, - Err(error) => { - result.add_error(error); - return result; - } - }; + let input_extracted = + match CodeblockContents::try_from_cursor(&input_cursor, walker.input_str()) { + Ok(value) => value, + Err(error) => { + result.add_error(error); + return result; + } + }; + let schema_extracted = + match CodeblockContents::try_from_cursor(&schema_cursor, walker.schema_str()) { + Ok(value) => value, + Err(error) => { + result.add_error(error); + return result; + } + }; - let ( - Some((schema_lang, (schema_code, schema_code_descendant_index))), - Some((input_lang, (input_code, input_code_descendant_index))), - ) = (&schema_extracted, &input_extracted) + let (Some(schema_contents), Some(input_contents)) = (&schema_extracted, &input_extracted) else { #[cfg(feature = "invariant_violations")] // The only reason the "entire thing" would be wrong is because we're @@ -106,12 +105,17 @@ fn validate_code_vs_code_impl(walker: &ValidatorWalker) -> ValidationResult { result, &schema_cursor, &input_cursor, - "Failed to extract code block contents from schema or input (schema: {:?}, input: {:?})", - schema_extracted, - input_extracted + "Failed to extract code block contents from schema or input" ); + #[allow(unreachable_code)] + return result; }; + let (schema_lang, (schema_code, schema_code_descendant_index)) = + (&schema_contents.lang, &schema_contents.code); + let (input_lang, (input_code, input_code_descendant_index)) = + (&input_contents.lang, &input_contents.code); + // Check if schema language has a matcher pattern (like {lang:/\w*/}) match schema_lang.as_ref().and_then(|(lang, descendant_index)| { extract_matcher_from_curly_delineated_text(lang) @@ -159,17 +163,18 @@ fn validate_code_vs_code_impl(walker: &ValidatorWalker) -> ValidationResult { Some((input_lang_str, input_lang_descendant_index)), Some((schema_lang_str, schema_lang_descendant_index)), ) = (schema_lang, input_lang) - && input_lang_str != schema_lang_str { - result.add_error(ValidationError::SchemaViolation( - SchemaViolationError::NodeContentMismatch { - schema_index: *schema_lang_descendant_index, - input_index: *input_lang_descendant_index, - expected: schema_lang_str.clone(), - actual: input_lang_str.clone(), - kind: NodeContentMismatchKind::Literal, - }, - )); - } + && input_lang_str != schema_lang_str + { + result.add_error(ValidationError::SchemaViolation( + SchemaViolationError::NodeContentMismatch { + schema_index: *schema_lang_descendant_index, + input_index: *input_lang_descendant_index, + expected: schema_lang_str.clone(), + actual: input_lang_str.clone(), + kind: NodeContentMismatchKind::Literal, + }, + )); + } } } @@ -207,7 +212,7 @@ mod tests { use serde_json::json; use super::super::test_utils::ValidatorTester; - use crate::mdschema::validator::ts_types::*; + use crate::mdschema::validation::ts_types::*; use super::*; diff --git a/src/mdschema/validator/node_walker/validators/containers.rs b/src/mdschema/validation/walkers/validators/containers.rs similarity index 97% rename from src/mdschema/validator/node_walker/validators/containers.rs rename to src/mdschema/validation/walkers/validators/containers.rs index 39b7c65..834506b 100644 --- a/src/mdschema/validator/node_walker/validators/containers.rs +++ b/src/mdschema/validation/walkers/validators/containers.rs @@ -7,15 +7,15 @@ //! - `RepeatedMatcherParagraphVsParagraphValidator`: handles paragraphs that //! contain a single repeating matcher, collecting matches across repeated //! paragraphs before delegating to nested validation. -use crate::mdschema::validator::matcher::matcher::MatcherKind; -use crate::mdschema::validator::node_walker::helpers::check_repeating_matchers::check_repeating_matchers; -use crate::mdschema::validator::node_walker::helpers::count_non_literal_matchers_in_children::count_non_literal_matchers_in_children; -use crate::mdschema::validator::ts_utils::{get_node_text, waiting_at_end}; -use crate::mdschema::validator::validator_walker::ValidatorWalker; -use crate::mdschema::validator::{ +use crate::mdschema::validation::matchers::matcher::MatcherKind; +use crate::mdschema::validation::walkers::helpers::check_repeating_matchers::check_repeating_matchers; +use crate::mdschema::validation::walkers::helpers::count_non_literal_matchers_in_children::count_non_literal_matchers_in_children; +use crate::mdschema::validation::ts_utils::{get_node_text, waiting_at_end}; +use crate::mdschema::validation::validator_walker::ValidatorWalker; +use crate::mdschema::validation::{ errors::*, - matcher::matcher::Matcher, - node_walker::{ + matchers::matcher::Matcher, + walkers::{ ValidationResult, helpers::expected_input_nodes::expected_input_nodes, validators::{ @@ -430,10 +430,10 @@ mod tests { use serde_json::json; use super::{ContainerVsContainerValidator, is_repeated_matcher_paragraph}; - use crate::mdschema::validator::{ + use crate::mdschema::validation::{ errors::{SchemaViolationError, ValidationError}, node_pos_pair::NodePosPair, - node_walker::validators::{ + walkers::validators::{ containers::RepeatedMatcherParagraphVsParagraphValidator, test_utils::ValidatorTester, }, ts_types::*, diff --git a/src/mdschema/validator/node_walker/validators/headings.rs b/src/mdschema/validation/walkers/validators/headings.rs similarity index 95% rename from src/mdschema/validator/node_walker/validators/headings.rs rename to src/mdschema/validation/walkers/validators/headings.rs index f710ea6..c1d62ed 100644 --- a/src/mdschema/validator/node_walker/validators/headings.rs +++ b/src/mdschema/validation/walkers/validators/headings.rs @@ -7,14 +7,14 @@ use log::trace; use tree_sitter::TreeCursor; use crate::invariant_violation; -use crate::mdschema::validator::errors::ValidationError; -use crate::mdschema::validator::node_walker::ValidationResult; -use crate::mdschema::validator::node_walker::helpers::compare_node_kinds::compare_node_kinds; -use crate::mdschema::validator::node_walker::validators::containers::ContainerVsContainerValidator; -use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; -use crate::mdschema::validator::ts_types::*; -use crate::mdschema::validator::ts_utils::waiting_at_end; -use crate::mdschema::validator::validator_walker::ValidatorWalker; +use crate::mdschema::validation::errors::ValidationError; +use crate::mdschema::validation::walkers::ValidationResult; +use crate::mdschema::validation::walkers::helpers::compare_node_kinds::compare_node_kinds; +use crate::mdschema::validation::walkers::validators::containers::ContainerVsContainerValidator; +use crate::mdschema::validation::walkers::validators::{Validator, ValidatorImpl}; +use crate::mdschema::validation::ts_types::*; +use crate::mdschema::validation::ts_utils::waiting_at_end; +use crate::mdschema::validation::validator_walker::ValidatorWalker; /// Validate two headings. /// @@ -140,7 +140,7 @@ fn ensure_at_heading_content(cursor: &mut TreeCursor) -> Result Validator for T { mod test_utils { use tree_sitter::{Node, Tree, TreeCursor}; - use crate::mdschema::validator::{ - node_walker::utils::pretty_print_cursor_pair, ts_utils::parse_markdown, + use crate::mdschema::validation::{ + walkers::utils::pretty_print_cursor_pair, ts_utils::parse_markdown, validator_walker::ValidatorWalker, }; diff --git a/src/mdschema/validator/node_walker/validators/nodes.rs b/src/mdschema/validation/walkers/validators/nodes.rs similarity index 93% rename from src/mdschema/validator/node_walker/validators/nodes.rs rename to src/mdschema/validation/walkers/validators/nodes.rs index 9841f26..734b7df 100644 --- a/src/mdschema/validator/node_walker/validators/nodes.rs +++ b/src/mdschema/validation/walkers/validators/nodes.rs @@ -5,23 +5,23 @@ //! based on node kinds and performs shared structural checks. use log::trace; -use crate::mdschema::validator::errors::{ +use crate::mdschema::validation::errors::{ MalformedStructureKind, SchemaViolationError, ValidationError, }; -use crate::mdschema::validator::node_pos_pair::NodePosPair; -use crate::mdschema::validator::node_walker::ValidationResult; -use crate::mdschema::validator::node_walker::validators::code::CodeVsCodeValidator; -use crate::mdschema::validator::node_walker::validators::containers::ContainerVsContainerValidator; -use crate::mdschema::validator::node_walker::validators::headings::HeadingVsHeadingValidator; -use crate::mdschema::validator::node_walker::validators::links::LinkVsLinkValidator; -use crate::mdschema::validator::node_walker::validators::lists::ListVsListValidator; -use crate::mdschema::validator::node_walker::validators::quotes::QuoteVsQuoteValidator; -use crate::mdschema::validator::node_walker::validators::tables::TableVsTableValidator; -use crate::mdschema::validator::node_walker::validators::textual::TextualVsTextualValidator; -use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; -use crate::mdschema::validator::ts_types::*; -use crate::mdschema::validator::ts_utils::waiting_at_end; -use crate::mdschema::validator::validator_walker::ValidatorWalker; +use crate::mdschema::validation::node_pos_pair::NodePosPair; +use crate::mdschema::validation::walkers::ValidationResult; +use crate::mdschema::validation::walkers::validators::code::CodeVsCodeValidator; +use crate::mdschema::validation::walkers::validators::containers::ContainerVsContainerValidator; +use crate::mdschema::validation::walkers::validators::headings::HeadingVsHeadingValidator; +use crate::mdschema::validation::walkers::validators::links::LinkVsLinkValidator; +use crate::mdschema::validation::walkers::validators::lists::ListVsListValidator; +use crate::mdschema::validation::walkers::validators::quotes::QuoteVsQuoteValidator; +use crate::mdschema::validation::walkers::validators::tables::TableVsTableValidator; +use crate::mdschema::validation::walkers::validators::textual::TextualVsTextualValidator; +use crate::mdschema::validation::walkers::validators::{Validator, ValidatorImpl}; +use crate::mdschema::validation::ts_types::*; +use crate::mdschema::validation::ts_utils::waiting_at_end; +use crate::mdschema::validation::validator_walker::ValidatorWalker; use crate::{compare_node_kinds_check, invariant_violation}; /// Validate two arbitrary nodes against each other. @@ -258,7 +258,7 @@ mod tests { use super::super::test_utils::ValidatorTester; use super::NodeVsNodeValidator; - use crate::mdschema::validator::{ + use crate::mdschema::validation::{ errors::{MalformedStructureKind, SchemaViolationError, ValidationError}, node_pos_pair::NodePosPair, ts_types::both_are_paragraphs, diff --git a/src/mdschema/validator/node_walker/validators/quotes.rs b/src/mdschema/validation/walkers/validators/quotes.rs similarity index 91% rename from src/mdschema/validator/node_walker/validators/quotes.rs rename to src/mdschema/validation/walkers/validators/quotes.rs index b3b66b8..f3ebce1 100644 --- a/src/mdschema/validator/node_walker/validators/quotes.rs +++ b/src/mdschema/validation/walkers/validators/quotes.rs @@ -3,10 +3,10 @@ //! Types: //! - `QuoteVsQuoteValidator`: verifies quote node kinds and delegates content //! validation to textual containers. -use crate::mdschema::validator::node_walker::ValidationResult; -use crate::mdschema::validator::node_walker::validators::containers::ContainerVsContainerValidator; -use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; -use crate::mdschema::validator::validator_walker::ValidatorWalker; +use crate::mdschema::validation::walkers::ValidationResult; +use crate::mdschema::validation::walkers::validators::containers::ContainerVsContainerValidator; +use crate::mdschema::validation::walkers::validators::{Validator, ValidatorImpl}; +use crate::mdschema::validation::validator_walker::ValidatorWalker; use crate::{compare_node_kinds_check, invariant_violation}; /// Validator for block quote nodes. @@ -70,7 +70,7 @@ impl ValidatorImpl for QuoteVsQuoteValidator { mod tests { use super::super::test_utils::ValidatorTester; use super::QuoteVsQuoteValidator; - use crate::mdschema::validator::{ + use crate::mdschema::validation::{ errors::{NodeContentMismatchKind, SchemaViolationError, ValidationError}, node_pos_pair::NodePosPair, }; diff --git a/src/mdschema/validator/node_walker/validators/tables.rs b/src/mdschema/validation/walkers/validators/tables.rs similarity index 98% rename from src/mdschema/validator/node_walker/validators/tables.rs rename to src/mdschema/validation/walkers/validators/tables.rs index f95a5a9..7b514f8 100644 --- a/src/mdschema/validator/node_walker/validators/tables.rs +++ b/src/mdschema/validation/walkers/validators/tables.rs @@ -6,18 +6,18 @@ //! - `RepeatedRowVsRowValidator`: processes schema rows followed by matcher //! repeaters, keeping the schema stationary while validating multiple input //! rows against a repeating matcher row. -use crate::mdschema::validator::errors::{ +use crate::mdschema::validation::errors::{ MalformedStructureKind, NodeContentMismatchKind, SchemaViolationError, ValidationError, }; -use crate::mdschema::validator::matcher::matcher::Matcher; -use crate::mdschema::validator::matcher::matcher_extras::MatcherExtras; -use crate::mdschema::validator::node_pos_pair::NodePosPair; -use crate::mdschema::validator::node_walker::ValidationResult; -use crate::mdschema::validator::node_walker::validators::containers::ContainerVsContainerValidator; -use crate::mdschema::validator::node_walker::validators::{Validator, ValidatorImpl}; -use crate::mdschema::validator::ts_types::*; -use crate::mdschema::validator::ts_utils::{get_node_text, waiting_at_end}; -use crate::mdschema::validator::validator_walker::ValidatorWalker; +use crate::mdschema::validation::matchers::matcher::Matcher; +use crate::mdschema::validation::matchers::matcher_extras::MatcherExtras; +use crate::mdschema::validation::node_pos_pair::NodePosPair; +use crate::mdschema::validation::walkers::ValidationResult; +use crate::mdschema::validation::walkers::validators::containers::ContainerVsContainerValidator; +use crate::mdschema::validation::walkers::validators::{Validator, ValidatorImpl}; +use crate::mdschema::validation::ts_types::*; +use crate::mdschema::validation::ts_utils::{get_node_text, waiting_at_end}; +use crate::mdschema::validation::validator_walker::ValidatorWalker; use crate::invariant_violation; use log::trace; use tree_sitter::TreeCursor; @@ -549,7 +549,7 @@ fn try_get_repeated_row_bounds( mod tests { use super::super::test_utils::ValidatorTester; use super::*; - use crate::mdschema::validator::{ + use crate::mdschema::validation::{ errors::{NodeContentMismatchKind, SchemaViolationError, ValidationError}, node_pos_pair::NodePosPair, ts_utils::parse_markdown, diff --git a/src/mdschema/validator/node_walker/validators/textual.rs b/src/mdschema/validation/walkers/validators/textual.rs similarity index 90% rename from src/mdschema/validator/node_walker/validators/textual.rs rename to src/mdschema/validation/walkers/validators/textual.rs index 7019492..719b23c 100644 --- a/src/mdschema/validator/node_walker/validators/textual.rs +++ b/src/mdschema/validation/walkers/validators/textual.rs @@ -8,12 +8,12 @@ use tree_sitter::TreeCursor; use crate::compare_node_kinds_check; use crate::invariant_violation; -use crate::mdschema::validator::node_walker::helpers::compare_text_contents::compare_text_contents; -use crate::mdschema::validator::node_walker::validators::ValidatorImpl; -use crate::mdschema::validator::node_walker::validators::matchers::MatcherVsTextValidator; -use crate::mdschema::validator::validator_walker::ValidatorWalker; -use crate::mdschema::validator::{ - node_walker::{ValidationResult, validators::Validator}, +use crate::mdschema::validation::walkers::helpers::compare_text_contents::compare_text_contents; +use crate::mdschema::validation::walkers::validators::ValidatorImpl; +use crate::mdschema::validation::walkers::validators::matchers::MatcherVsTextValidator; +use crate::mdschema::validation::validator_walker::ValidatorWalker; +use crate::mdschema::validation::{ + walkers::{ValidationResult, validators::Validator}, ts_types::*, ts_utils::{get_next_node, waiting_at_end}, }; @@ -116,7 +116,7 @@ mod tests { use super::super::test_utils::ValidatorTester; use super::TextualVsTextualValidator; - use crate::mdschema::validator::{node_pos_pair::NodePosPair, ts_types::*}; + use crate::mdschema::validation::{node_pos_pair::NodePosPair, ts_types::*}; #[test] fn test_validate_textual_vs_textual_with_literal_matcher() { diff --git a/tests/code.rs b/tests/code.rs index 0c537ad..4c733db 100644 --- a/tests/code.rs +++ b/tests/code.rs @@ -3,7 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{ +use mdvalidate::mdschema::validation::errors::{ NodeContentMismatchKind, SchemaViolationError, ValidationError, }; diff --git a/tests/headings.rs b/tests/headings.rs index a08bc2b..128daa4 100644 --- a/tests/headings.rs +++ b/tests/headings.rs @@ -3,7 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{SchemaViolationError, ValidationError}; +use mdvalidate::mdschema::validation::errors::{SchemaViolationError, ValidationError}; test_case!(heading_literal, r#"# Hi"#, r#"# Hi"#, json!({}), vec![]); diff --git a/tests/helpers/mod.rs b/tests/helpers/mod.rs index acf42ae..9575479 100644 --- a/tests/helpers/mod.rs +++ b/tests/helpers/mod.rs @@ -1,5 +1,5 @@ -use mdvalidate::mdschema::validator::errors::ValidationError; -use mdvalidate::mdschema::validator::validator::{Validator, ValidatorState}; +use mdvalidate::mdschema::validation::errors::ValidationError; +use mdvalidate::mdschema::validation::validator::{Validator, ValidatorState}; use serde_json::Value; pub fn run_test_case(schema: &str, input: &str) -> (Vec, Value) { diff --git a/tests/links.rs b/tests/links.rs index 90daeb2..3e7d6cf 100644 --- a/tests/links.rs +++ b/tests/links.rs @@ -3,7 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{ +use mdvalidate::mdschema::validation::errors::{ NodeContentMismatchKind, SchemaViolationError, ValidationError, }; diff --git a/tests/lists.rs b/tests/lists.rs index 74c6313..6b4ae03 100644 --- a/tests/lists.rs +++ b/tests/lists.rs @@ -3,7 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{SchemaViolationError, ValidationError}; +use mdvalidate::mdschema::validation::errors::{SchemaViolationError, ValidationError}; test_case!( ordered_list_literal, diff --git a/tests/matchers.rs b/tests/matchers.rs index 02b0794..d714297 100644 --- a/tests/matchers.rs +++ b/tests/matchers.rs @@ -3,7 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{ +use mdvalidate::mdschema::validation::errors::{ NodeContentMismatchKind, SchemaViolationError, ValidationError, }; diff --git a/tests/misc.rs b/tests/misc.rs index 125454d..67e8187 100644 --- a/tests/misc.rs +++ b/tests/misc.rs @@ -3,7 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{ +use mdvalidate::mdschema::validation::errors::{ MalformedStructureKind, SchemaViolationError, ValidationError, }; diff --git a/tests/quotes.rs b/tests/quotes.rs index 1c5cb72..38c6fe1 100644 --- a/tests/quotes.rs +++ b/tests/quotes.rs @@ -3,7 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{ +use mdvalidate::mdschema::validation::errors::{ NodeContentMismatchKind, SchemaViolationError, ValidationError, }; diff --git a/tests/rulers.rs b/tests/rulers.rs index be1e826..cc6d0b3 100644 --- a/tests/rulers.rs +++ b/tests/rulers.rs @@ -3,7 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{ +use mdvalidate::mdschema::validation::errors::{ MalformedStructureKind, SchemaViolationError, ValidationError, }; diff --git a/tests/tables.rs b/tests/tables.rs index b927f16..a7e88f1 100644 --- a/tests/tables.rs +++ b/tests/tables.rs @@ -3,7 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{ +use mdvalidate::mdschema::validation::errors::{ NodeContentMismatchKind, SchemaViolationError, ValidationError, }; diff --git a/tests/textual.rs b/tests/textual.rs index d47665b..75b1c20 100644 --- a/tests/textual.rs +++ b/tests/textual.rs @@ -3,7 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{ +use mdvalidate::mdschema::validation::errors::{ NodeContentMismatchKind, SchemaViolationError, ValidationError, }; diff --git a/tests/textual_container.rs b/tests/textual_container.rs index 2236dbb..47a0300 100644 --- a/tests/textual_container.rs +++ b/tests/textual_container.rs @@ -3,7 +3,7 @@ use serde_json::json; #[macro_use] mod helpers; -use mdvalidate::mdschema::validator::errors::{ +use mdvalidate::mdschema::validation::errors::{ NodeContentMismatchKind, SchemaViolationError, ValidationError, }; From 35738582a57b2a70033281070dd75a9fc44950bc Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 19:33:25 -0500 Subject: [PATCH 29/33] fix doc tests --- src/mdschema/validation/matchers/matcher_extras.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdschema/validation/matchers/matcher_extras.rs b/src/mdschema/validation/matchers/matcher_extras.rs index 9d0bc1e..4a8f4cc 100644 --- a/src/mdschema/validation/matchers/matcher_extras.rs +++ b/src/mdschema/validation/matchers/matcher_extras.rs @@ -98,7 +98,7 @@ impl std::fmt::Display for MatcherExtrasError { /// # Examples /// /// ``` -/// use mdvalidate::mdschema::validation::matcher::matcher_extras::MatcherExtras; +/// use mdvalidate::mdschema::validation::matchers::matcher_extras::MatcherExtras; /// /// // Matcher with repeat limits: `name:/\w+/`{2,5} /// let extras = MatcherExtras::try_new(Some("{2,5}")).unwrap(); From ecac0e468d41e7d45f0db795a7fc7acc9332fbf6 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 19:33:42 -0500 Subject: [PATCH 30/33] only on push to main --- .github/workflows/release.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index c8409f5..35d919f 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,11 +1,14 @@ name: Release on: push: + branches: + - main tags: - "v*" workflow_dispatch: permissions: contents: write + jobs: build: strategy: From ef5c4df8e6125903bdcdbd10f33daab3a9889b12 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 19:36:33 -0500 Subject: [PATCH 31/33] chore: Release --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d342f16..16ed409 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -646,7 +646,7 @@ dependencies = [ [[package]] name = "mdvalidate" -version = "0.2.3" +version = "0.2.4" dependencies = [ "ariadne", "clap", diff --git a/Cargo.toml b/Cargo.toml index fe69ca5..e0b91d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,7 @@ path = "src/lib.rs" [package] name = "mdvalidate" -version = "0.2.3" +version = "0.2.4" description = "Markdown schema validation engine" license = "MIT" documentation = "https://github.com/404wolf/mdvalidate" From 260c14b9788bda13c45a47b9ce857f8b59370747 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 19:37:43 -0500 Subject: [PATCH 32/33] update mdvalidate utils --- Cargo.lock | 2 +- Cargo.toml | 2 +- utils/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 16ed409..1bfed47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -673,7 +673,7 @@ dependencies = [ [[package]] name = "mdvalidate-utils" -version = "0.0.1" +version = "0.0.2" dependencies = [ "clap", "ptree", diff --git a/Cargo.toml b/Cargo.toml index e0b91d6..9d727b7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ tree-sitter-markdown = {package = "tree-sitter-markdown-fork", version = "0.7.1" ptree = "0.5.2" paste = "1.0.15" tabled = "0.20.0" -mdvalidate-utils = {version = "0.0.1", path = "utils"} +mdvalidate-utils = {version = "0.0.2", path = "utils"} thiserror = "2.0.17" derive_builder = "0.20.2" diff --git a/utils/Cargo.toml b/utils/Cargo.toml index 15d5379..6a662d4 100644 --- a/utils/Cargo.toml +++ b/utils/Cargo.toml @@ -9,7 +9,7 @@ path = "src/lib.rs" [package] name = "mdvalidate-utils" -version = "0.0.1" +version = "0.0.2" edition = "2024" description = "Utility functions for mdvalidate" license = "MIT" From 8ff1e3069af81438da99a339e140ea427d049ed7 Mon Sep 17 00:00:00 2001 From: Wolf Mermelstein Date: Sat, 10 Jan 2026 19:38:20 -0500 Subject: [PATCH 33/33] chore: Release --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1bfed47..8e0c6ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -646,7 +646,7 @@ dependencies = [ [[package]] name = "mdvalidate" -version = "0.2.4" +version = "0.2.5" dependencies = [ "ariadne", "clap", diff --git a/Cargo.toml b/Cargo.toml index 9d727b7..a07798c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,7 @@ path = "src/lib.rs" [package] name = "mdvalidate" -version = "0.2.4" +version = "0.2.5" description = "Markdown schema validation engine" license = "MIT" documentation = "https://github.com/404wolf/mdvalidate"