diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index c8409f5..35d919f 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,11 +1,14 @@
name: Release
on:
push:
+ branches:
+ - main
tags:
- "v*"
workflow_dispatch:
permissions:
contents: write
+
jobs:
build:
strategy:
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 3a32f2b..52f7c02 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -1,35 +1,28 @@
name: Tests
-
on:
push:
-
jobs:
test:
runs-on: ubuntu-latest
-
permissions:
contents: read
actions: write
-
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
-
- name: Install Nix
uses: cachix/install-nix-action@v31
with:
extra_nix_config: |
experimental-features = nix-command flakes
-
- name: Restore Nix store
id: nix-cache
uses: nix-community/cache-nix-action/restore@v7
with:
primary-key: nix-${{ runner.os }}-${{ hashFiles('**/flake.lock') }}
restore-prefixes-first-match: nix-${{ runner.os }}-
-
- name: Restore Cargo cache
uses: actions/cache/restore@v4
id: cargo-cache
@@ -43,13 +36,12 @@ jobs:
key: cargo-${{ runner.os }}-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
cargo-${{ runner.os }}-
-
- name: Build project
run: nix develop --command cargo build
-
- name: Run tests
run: nix develop --command cargo test
-
+ - name: Run clippy
+ run: nix develop --command cargo clippy -- -D warnings
- name: Save Cargo cache
if: always()
uses: actions/cache/save@v4
@@ -61,7 +53,6 @@ jobs:
~/.cargo/git/db/
target/
key: ${{ steps.cargo-cache.outputs.cache-primary-key }}
-
- name: Save Nix store
uses: nix-community/cache-nix-action/save@v7
if: always()
@@ -72,4 +63,4 @@ jobs:
purge-prefixes: nix-${{ runner.os }}-
purge-created: 0
purge-last-accessed: P7D
- purge-primary-key: never
\ No newline at end of file
+ purge-primary-key: never
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..a36bc88
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,25 @@
+## ts_types imports
+- Always import `ts_types` via wildcard (`use crate::mdschema::validator::ts_types::*;`) so we do not list individual members.
+
+## Test imports
+- Prefer `super::...` imports inside `#[cfg(test)]` modules (e.g., `super::test_utils::ValidatorTester` or `super::TextualVsTextualValidator`) so the tests stay concise and structured.
+- Keep using wildcard `ts_types::*` in tests as well.
+
+## Documentation
+- When a doc block lists both `schema_str` and `input_str`, use the exact wording:
+ - `schema_str`: The full input document (so far).
+ - `input_str`: The full schema document.
+- Every `///` doc line that mentions `got_eof` must read verbatim `/// * \`got_eof\`: Whether we have received the full input document.`
+
+## Node-walker validator docs
+- Every file under `src/mdschema/validator/node_walker/validators` should start with a module doc comment and list each validator type defined in that file.
+
+## Walker usage
+- Never add aliases such as `let schema_str = walker.schema_str()` or `let input_str = walker.input_str()`; call the walker methods directly.
+
+## Contributing-from-CONTRIBUTING.md
+- When we talk about a data structure that stores references to schema and input, keep the schema entry first.
+- Prefer `get_node_text` from `ts_utils` over calling `utf8_text` directly.
+- In tests, keep assertion order consistent: position assertions first, followed by errors, then values.
+- Avoid `ValidationResult::destruct`; use accessors like `result.errors()`, `result.value()`, or `result.farthest_reached_pos()`.
+- When debugging tests, call `test_logging!();` (from `utils.rs`) at the top of the suite to hydrate logs and trace output.
diff --git a/Cargo.lock b/Cargo.lock
index fde13c8..8e0c6ab 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -144,9 +144,9 @@ checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e"
[[package]]
name = "cc"
-version = "1.2.49"
+version = "1.2.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215"
+checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3"
dependencies = [
"find-msvc-tools",
"shlex",
@@ -160,9 +160,9 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "clap"
-version = "4.5.53"
+version = "4.5.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
+checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394"
dependencies = [
"clap_builder",
"clap_derive",
@@ -170,9 +170,9 @@ dependencies = [
[[package]]
name = "clap_builder"
-version = "4.5.53"
+version = "4.5.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
+checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00"
dependencies = [
"anstream",
"anstyle",
@@ -286,6 +286,72 @@ dependencies = [
"typenum",
]
+[[package]]
+name = "darling"
+version = "0.20.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
+dependencies = [
+ "darling_core",
+ "darling_macro",
+]
+
+[[package]]
+name = "darling_core"
+version = "0.20.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.20.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
+dependencies = [
+ "darling_core",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "derive_builder"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
+dependencies = [
+ "derive_builder_macro",
+]
+
+[[package]]
+name = "derive_builder_core"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
+dependencies = [
+ "darling",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "derive_builder_macro"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
+dependencies = [
+ "derive_builder_core",
+ "syn",
+]
+
[[package]]
name = "digest"
version = "0.10.7"
@@ -381,9 +447,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "find-msvc-tools"
-version = "0.1.5"
+version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
+checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41"
[[package]]
name = "fixedbitset"
@@ -473,11 +539,17 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
+[[package]]
+name = "ident_case"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
+
[[package]]
name = "indexmap"
-version = "2.12.1"
+version = "2.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
dependencies = [
"equivalent",
"hashbrown 0.16.1",
@@ -502,9 +574,9 @@ checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
[[package]]
name = "itoa"
-version = "1.0.15"
+version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]]
name = "json5"
@@ -531,9 +603,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
-version = "0.2.178"
+version = "0.2.180"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
+checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
[[package]]
name = "libredox"
@@ -574,11 +646,12 @@ dependencies = [
[[package]]
name = "mdvalidate"
-version = "0.2.3"
+version = "0.2.5"
dependencies = [
"ariadne",
"clap",
"colored",
+ "derive_builder",
"env_logger",
"envy",
"line-col",
@@ -600,7 +673,7 @@ dependencies = [
[[package]]
name = "mdvalidate-utils"
-version = "0.0.1"
+version = "0.0.2"
dependencies = [
"clap",
"ptree",
@@ -710,9 +783,9 @@ checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
[[package]]
name = "pest"
-version = "2.8.4"
+version = "2.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cbcfd20a6d4eeba40179f05735784ad32bdaef05ce8e8af05f180d45bb3e7e22"
+checksum = "2c9eb05c21a464ea704b53158d358a31e6425db2f63a1a7312268b05fe2b75f7"
dependencies = [
"memchr",
"ucd-trie",
@@ -720,9 +793,9 @@ dependencies = [
[[package]]
name = "pest_derive"
-version = "2.8.4"
+version = "2.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51f72981ade67b1ca6adc26ec221be9f463f2b5839c7508998daa17c23d94d7f"
+checksum = "68f9dbced329c441fa79d80472764b1a2c7e57123553b8519b36663a2fb234ed"
dependencies = [
"pest",
"pest_generator",
@@ -730,9 +803,9 @@ dependencies = [
[[package]]
name = "pest_generator"
-version = "2.8.4"
+version = "2.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dee9efd8cdb50d719a80088b76f81aec7c41ed6d522ee750178f83883d271625"
+checksum = "3bb96d5051a78f44f43c8f712d8e810adb0ebf923fc9ed2655a7f66f63ba8ee5"
dependencies = [
"pest",
"pest_meta",
@@ -743,9 +816,9 @@ dependencies = [
[[package]]
name = "pest_meta"
-version = "2.8.4"
+version = "2.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf1d70880e76bdc13ba52eafa6239ce793d85c8e43896507e43dd8984ff05b82"
+checksum = "602113b5b5e8621770cfd490cfd90b9f84ab29bd2b0e49ad83eb6d186cef2365"
dependencies = [
"pest",
"sha2",
@@ -791,9 +864,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
-version = "1.0.103"
+version = "1.0.105"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
+checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7"
dependencies = [
"unicode-ident",
]
@@ -815,9 +888,9 @@ dependencies = [
[[package]]
name = "quote"
-version = "1.0.42"
+version = "1.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
+checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a"
dependencies = [
"proc-macro2",
]
@@ -892,9 +965,9 @@ dependencies = [
[[package]]
name = "rustix"
-version = "1.1.2"
+version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
+checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
dependencies = [
"bitflags",
"errno",
@@ -903,12 +976,6 @@ dependencies = [
"windows-sys 0.61.2",
]
-[[package]]
-name = "ryu"
-version = "1.0.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
-
[[package]]
name = "serde"
version = "1.0.228"
@@ -951,16 +1018,16 @@ dependencies = [
[[package]]
name = "serde_json"
-version = "1.0.145"
+version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
dependencies = [
"indexmap",
"itoa",
"memchr",
- "ryu",
"serde",
"serde_core",
+ "zmij",
]
[[package]]
@@ -1018,9 +1085,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
-version = "2.0.111"
+version = "2.0.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
+checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
dependencies = [
"proc-macro2",
"quote",
@@ -1053,9 +1120,9 @@ dependencies = [
[[package]]
name = "tempfile"
-version = "3.23.0"
+version = "3.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
+checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c"
dependencies = [
"fastrand",
"getrandom 0.3.4",
@@ -1192,9 +1259,9 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
[[package]]
name = "tracing"
-version = "0.1.43"
+version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
dependencies = [
"pin-project-lite",
"tracing-attributes",
@@ -1214,9 +1281,9 @@ dependencies = [
[[package]]
name = "tracing-core"
-version = "0.1.35"
+version = "0.1.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
dependencies = [
"once_cell",
"valuable",
@@ -1547,20 +1614,26 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
[[package]]
name = "zerocopy"
-version = "0.8.31"
+version = "0.8.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
+checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
-version = "0.8.31"
+version = "0.8.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
+checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
+
+[[package]]
+name = "zmij"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fc5a66a20078bf1251bde995aa2fdcc4b800c70b5d92dd2c62abc5c60f679f8"
diff --git a/Cargo.toml b/Cargo.toml
index 22daed9..a07798c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,8 +22,9 @@ tree-sitter-markdown = {package = "tree-sitter-markdown-fork", version = "0.7.1"
ptree = "0.5.2"
paste = "1.0.15"
tabled = "0.20.0"
-mdvalidate-utils = {version = "0.0.1", path = "utils"}
+mdvalidate-utils = {version = "0.0.2", path = "utils"}
thiserror = "2.0.17"
+derive_builder = "0.20.2"
[dev-dependencies]
ptree = "0.5.2"
@@ -37,7 +38,7 @@ path = "src/lib.rs"
[package]
name = "mdvalidate"
-version = "0.2.3"
+version = "0.2.5"
description = "Markdown schema validation engine"
license = "MIT"
documentation = "https://github.com/404wolf/mdvalidate"
diff --git a/README.md b/README.md
index 4473343..01b41d9 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ We plan to eventually support converting a Markdown schema into a JSON schema de
You can find the full docs [here](https://404wolf.github.io/mdvalidate/)!
-## Kitchen Sink Example (current + planned)
+## Kitchen Sink Example
Schema:
@@ -28,6 +28,10 @@ Schema:
- `feature:/[A-Za-z][\w -]+/`{2,4}
- `detail:/[a-z][\w -]+/`{,2}
+## Description
+
+`description`{2,3}
+
Inline: `code`! and `bang`!!
```{lang:/\w+/}
@@ -66,6 +70,14 @@ Input:
- fewer allocations
- Safer IO
+## Description
+
+This release focuses on performance improvements and safety enhancements.
+
+Key changes include optimized memory management and stricter type checking.
+
+We've also improved error messages throughout the codebase.
+
Inline: `code` and `bang`!
```rust
@@ -94,6 +106,11 @@ Output:
{
"build": "7A9F3C1",
"checked": "print(\"ok\")",
+ "description": [
+ "This release focuses on performance improvements and safety enhancements.",
+ "Key changes include optimized memory management and stricter type checking.",
+ "We've also improved error messages throughout the codebase."
+ ],
"detail": [
"fewer allocations"
],
diff --git a/docs/src/content/docs/matchers/02-matchers.mdx b/docs/src/content/docs/matchers/02-matchers.mdx
index c09e80b..90f1395 100644
--- a/docs/src/content/docs/matchers/02-matchers.mdx
+++ b/docs/src/content/docs/matchers/02-matchers.mdx
@@ -7,42 +7,89 @@ order: 2
import SchemaAndInput from "../../../components/SchemaAndInput.astro";
import TODO from "../../../components/TODO.astro";
-Matchers allow you to validate dynamic content using regular expressions. A matcher is defined using inline code syntax with a specific format: `` `label:/pattern/` ``.
+Matchers allow you to validate dynamic content in your Markdown documents. There are two types of matchers: **regex matchers** that match patterns using regular expressions, and **all matchers** that match everything as an identity function.
-## Basic Syntax
+# Matcher Types
-The basic matcher format is:
+## Regex Matchers
+
+A regex matcher is defined using inline code syntax with a specific format: `` `label:/pattern/` ``
```
`label:/regex-pattern/`
```
- **label**: An identifier for the matched value (used in validation output)
-- **pattern**: A JavaScript-compatible regular expression
+- **pattern**: A regular expression that matches the content
The pattern is automatically anchored to the start (as if prefixed with `^`), so it matches from the beginning of the available text.
+### Simple Examples
+
-
+
+
+
+
+## All Matchers
+
+All matchers act as an identity function - they **always** match and return exactly what was passed to them. If a matcher has no regex pattern (just a label in backticks), it becomes an all matcher that accepts all available content in the current context.
+
+The syntax is simply `` `label` `` without a regex pattern.
-
+
-## Matchers with Surrounding Text
+
-Matchers can be combined with literal text as prefixes and suffixes:
+All matchers accept any input including special characters, spaces, and other spanning (inline) nodes:
+
+
+
+
+
+
+
+# Matchers with Surrounding Text
+
+Both regex matchers and all matchers can be combined with literal text as prefixes and suffixes:
-This also works for spanning nodes of other types, like italics and subsequent code spans via literal matchers (more on this later).
+## Spanning Multiple Node Types
+
+Matchers can work across different spanning node types, like italics and subsequent code spans:
+# Label Naming Rules
+
+Matcher labels (for both regex matchers and all matchers) must follow these rules:
+
+- Must contain only alphanumeric characters (a-z, A-Z, 0-9), hyphens (`-`), and underscores (`_`)
+- Cannot contain spaces or other special characters
+- Valid examples: `user_name`, `item-count`, `id123`, `MyData`
+- Invalid examples: `user name` (space), `data@field` (special char), `item.count` (period)
+
## Empty Labels
@@ -100,15 +158,15 @@ This also works for spanning nodes of other types, like italics and subsequent c
To match without capturing a value, use an underscore (`_`) as the label:
-## Multiple matchers
+# Multiple Matchers
-Right now, you can only have one matcher per paragraph (collection of spanning elements). So, for example, the following will not work.
+Right now, you can only have one matcher per paragraph (collection of spanning elements). So, for example, the following will not work:
-## Literal Code Blocks
+# Repeating Paragraphs
+
+You can validate multiple paragraph nodes into an array by using a repeated matcher. The repeated matcher syntax is `` {min,max} ``, where `min` and `max` are optional.
+
+**Important:** Repeating paragraph matchers must be **all matchers** (`` `label` ``), not regex matchers. This is because each paragraph can contain arbitrary content and structure.
+
+
+
+
+
+
+
+# Literal Code Blocks
To match inline code blocks literally instead of treating them as matchers, add `!` after the code block:
@@ -138,7 +222,7 @@ To match inline code blocks literally instead of treating them as matchers, add
valid={false}
/>
-### Escaping the Exclamation Mark
+## Escaping the Exclamation Mark
@@ -150,14 +234,14 @@ Use `!!` to match a literal exclamation mark after code:
valid={true}
/>
-## Execution Validation
+# Execution Validation
You can validate content by running an executable. Use the syntax `label:!command`:
-
Use `{min,max}` syntax on row patterns to match multiple rows:
+### Mixing Literal and Repeated Rows
+
+You can combine literal rows with repeated rows in the same table:
+
+
+
+The validator will:
+1. Match the literal "Header" row
+2. Match 1-3 repeating rows with the pattern
+3. Match the literal "Footer" row
+
## Notes
-- Rows return arrays when using repetition matchers
+- Repeated rows return arrays for matched values
- Headers and separator rows are required in both schema and input
- Column count must match between schema and input
+- Repeated row patterns must appear at the end of a row (after all cells)
diff --git a/examples/cli/input.md b/examples/cli/input.md
index 792d600..37c255b 100644
--- a/examples/cli/input.md
+++ b/examples/cli/input.md
@@ -1 +1,12 @@
-#
+Imported with [Obsidian Markdown Importer](https://github.com/404Wolf/obsidian-contact-importer)
+
+---
+
+
+
+## Phones
+
+| Type | Number |
+| :----- | :----------------- |
+| Backup | `c!(917) 246-7875` |
+| Misc | `c!(929) 265-7180` |
diff --git a/examples/cli/schema.md b/examples/cli/schema.md
index 792d600..e4ae77e 100644
--- a/examples/cli/schema.md
+++ b/examples/cli/schema.md
@@ -1 +1,11 @@
-#
+Imported with [Obsidian Markdown Importer](https://github.com/404Wolf/obsidian-contact-importer)
+
+---
+
+
+
+## Phones
+
+| Type | Number |
+| :---------------- | :------------------ |
+| `phone_type:/.*/` | `phone_number:/.*/` |{,}
diff --git a/examples/simple.rs b/examples/simple.rs
index 4f4a14d..59c5378 100644
--- a/examples/simple.rs
+++ b/examples/simple.rs
@@ -1,4 +1,4 @@
-use mdvalidate::{Validator, mdschema::validator::errors::pretty_print_error};
+use mdvalidate::{Validator, mdschema::validation::errors::pretty_print_error};
fn main() {
// Define a simple schema: a heading with a name and a list
diff --git a/src/cmd.rs b/src/cmd.rs
index c453bce..1742cb9 100644
--- a/src/cmd.rs
+++ b/src/cmd.rs
@@ -1,4 +1,4 @@
-use crate::mdschema::validator::{
+use crate::mdschema::validation::{
errors::{
ParserError, PrettyPrintError, ValidationError, debug_print_error, pretty_print_error,
},
@@ -20,6 +20,14 @@ pub enum ProcessingError {
Utf8(std::str::Utf8Error),
}
+#[derive(Debug)]
+pub struct ProcessingResult {
+ pub errors: Vec,
+ pub matches: Value,
+ pub validator: Validator,
+ pub input_str: String,
+}
+
impl From for ProcessingError {
fn from(error: std::io::Error) -> Self {
ProcessingError::Io(error)
@@ -76,50 +84,57 @@ impl From for ProcessingError {
}
}
-pub fn process(
- schema_str: &String,
- input: &mut R,
- fast_fail: bool,
-) -> Result<((Vec, Value), Validator, String), ProcessingError> {
- let buffer_size = get_buffer_size();
+impl ProcessingResult {
+ pub fn process(
+ schema_str: &str,
+ input: &mut R,
+ fast_fail: bool,
+ ) -> Result {
+ let buffer_size = get_buffer_size();
- let mut input_str = String::new();
- let mut buffer = vec![0; buffer_size];
+ let mut input_str = String::new();
+ let mut buffer = vec![0; buffer_size];
- let mut validator = Validator::new_incomplete(schema_str.as_str(), input_str.as_str())
- .ok_or(ValidationError::ValidatorCreationFailed)?;
+ let mut validator = Validator::new_incomplete(schema_str, input_str.as_str())
+ .ok_or(ValidationError::ValidatorCreationFailed)?;
- loop {
- let bytes_read = input.read(&mut buffer)?;
+ loop {
+ let bytes_read = input.read(&mut buffer)?;
- // If we're done reading, mark EOF
- if bytes_read == 0 {
- validator.read_final_input(&input_str)?;
- validator.validate();
+ // If we're done reading, mark EOF
+ if bytes_read == 0 {
+ validator.read_final_input(&input_str)?;
+ validator.validate();
- break;
- }
+ break;
+ }
- let new_text = std::str::from_utf8(&buffer[..bytes_read])?;
- input_str.push_str(new_text);
+ let new_text = std::str::from_utf8(&buffer[..bytes_read])?;
+ input_str.push_str(new_text);
- validator.read_more_input(&input_str)?;
- validator.validate();
+ validator.read_more_input(&input_str)?;
+ validator.validate();
- // Check for fast-fail AFTER validation
- if fast_fail && validator.errors_so_far().count() > 0 {
- break;
+ // Check for fast-fail AFTER validation
+ if fast_fail && validator.errors_so_far().count() > 0 {
+ break;
+ }
}
- }
- let errors: Vec<_> = validator.errors_so_far().cloned().collect();
- let matches = validator.matches_so_far().clone();
+ let errors: Vec<_> = validator.errors_so_far().cloned().collect();
+ let matches = validator.matches_so_far().clone();
- Ok(((errors, matches), validator, input_str))
+ Ok(ProcessingResult {
+ errors,
+ matches,
+ validator,
+ input_str,
+ })
+ }
}
pub fn process_stdio(
- schema_str: &String,
+ schema_str: &str,
input: &mut R,
output: &mut Option<&mut W>,
filename: &str,
@@ -127,7 +142,12 @@ pub fn process_stdio(
quiet: bool,
debug_mode: bool,
) -> Result<((Vec, Value), bool), ProcessingError> {
- let ((errors, matches), validator, _input_str) = process(schema_str, input, fast_fail)?;
+ let ProcessingResult {
+ errors,
+ matches,
+ validator,
+ input_str: _input_str,
+ } = ProcessingResult::process(schema_str, input, fast_fail)?;
let mut errored = false;
if errors.is_empty() {
@@ -175,9 +195,10 @@ mod tests {
mut input: R,
fast_fail: bool,
) -> (Vec, Value) {
- let ((errors, matches), _validator, _) = process(schema, &mut input, fast_fail)
+ let result = ProcessingResult::process(schema, &mut input, fast_fail)
.expect("Validation should complete without errors");
- (errors, matches)
+
+ (result.errors, result.matches)
}
/// A custom reader that only reads a specific number of bytes at a time
@@ -372,7 +393,7 @@ This is a test"#;
"Expected exactly one error but found {:?}",
errors
);
- assert!(matches.is_null() || matches.as_object().map_or(true, |obj| obj.is_empty()));
+ assert!(matches.is_null() || matches.as_object().is_none_or(|obj| obj.is_empty()));
}
}
diff --git a/src/env.rs b/src/env.rs
index e32ed40..eb1dcfc 100644
--- a/src/env.rs
+++ b/src/env.rs
@@ -9,6 +9,7 @@ use serde::Deserialize;
///
/// All fields are optional.
#[derive(Debug, Deserialize, Clone)]
+#[derive(Default)]
pub struct EnvConfig {
/// Enable debug mode for error output.
///
@@ -20,11 +21,6 @@ pub struct EnvConfig {
pub dev_debug: bool,
}
-impl Default for EnvConfig {
- fn default() -> Self {
- Self { dev_debug: false }
- }
-}
impl EnvConfig {
/// Load configuration from environment variables.
diff --git a/src/main.rs b/src/main.rs
index ac6f170..49a58a7 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -51,20 +51,18 @@ fn main() -> Result<(), Box> {
let env_config = EnvConfig::load();
let schema_src = PathOrStdio::from(args.schema);
- let schema_src = schema_src.reader().or_else(|e| {
- Err(format!(
+ let schema_src = schema_src.reader().map_err(|e| format!(
"Failed to open schema file '{}': {}",
schema_src.filepath(),
e
- ))
- })?;
+ ))?;
let mut schema_str = String::new();
BufReader::new(schema_src).read_to_string(&mut schema_str)?;
let input = PathOrStdio::from(args.input);
let mut input_reader = input.reader()?;
- let mut output_writer: &mut Option<&mut Box> = match args.output {
+ let output_writer: &mut Option<&mut Box> = match args.output {
Some(ref output_path) => {
let output_pos = PathOrStdio::from(output_path.clone());
&mut Some(&mut output_pos.writer()?)
@@ -75,7 +73,7 @@ fn main() -> Result<(), Box> {
match process_stdio(
&schema_str,
&mut input_reader,
- &mut output_writer,
+ output_writer,
input.filepath(),
args.fast_fail,
args.quiet,
diff --git a/src/mdschema/mod.rs b/src/mdschema/mod.rs
index 5b3449b..0124f37 100644
--- a/src/mdschema/mod.rs
+++ b/src/mdschema/mod.rs
@@ -1,3 +1,3 @@
-pub mod validator;
+pub mod validation;
-pub use validator::validator::Validator;
+pub use validation::validator::Validator;
diff --git a/src/mdschema/validator/errors.rs b/src/mdschema/validation/errors.rs
similarity index 85%
rename from src/mdschema/validator/errors.rs
rename to src/mdschema/validation/errors.rs
index b6a8c7c..e5adff5 100644
--- a/src/mdschema/validator/errors.rs
+++ b/src/mdschema/validation/errors.rs
@@ -1,13 +1,16 @@
-use crate::mdschema::validator::{
- matcher::{matcher::*, matcher_extras::MatcherExtrasError},
+use crate::mdschema::validation::{
+ matchers::{
+ matcher::*,
+ matcher_extras::{MatcherExtras, MatcherExtrasError},
+ },
validator::{Validator, ValidatorState},
};
use ariadne::{Color, Label, Report, ReportKind, Source};
use std::fmt;
use tree_sitter::TreeCursor;
-use crate::mdschema::validator::{
- node_walker::utils::pretty_print_cursor_pair,
+use crate::mdschema::validation::{
+ walkers::utils::pretty_print_cursor_pair,
ts_utils::{find_node_by_index, walk_to_root},
};
@@ -16,7 +19,7 @@ macro_rules! trace_cursors {
($schema_cursor:expr, $input_cursor:expr) => {{
println!(
"{}",
- crate::mdschema::validator::node_walker::utils::pretty_print_cursor_pair(
+ $crate::mdschema::validator::node_walker::utils::pretty_print_cursor_pair(
&$schema_cursor,
&$input_cursor,
)
@@ -32,11 +35,11 @@ macro_rules! invariant_violation {
($schema_cursor:expr, $input_cursor:expr, $message:expr $(, $($args:tt)*)?) => {{
#[cfg(feature = "invariant_violations")]
{
- let cursor_info = $crate::mdschema::validator::node_walker::utils::pretty_print_cursor_pair(
+ let cursor_info = $crate::mdschema::validation::walkers::utils::pretty_print_cursor_pair(
$schema_cursor,
$input_cursor,
);
- let error_msg = $crate::mdschema::validator::errors::invariant_violation_message(
+ let error_msg = $crate::mdschema::validation::errors::invariant_violation_message(
Some(($schema_cursor, $input_cursor)),
format!($message $(, $($args)*)?),
module_path!(),
@@ -52,7 +55,7 @@ macro_rules! invariant_violation {
($message:expr $(, $($args:tt)*)?) => {{
#[cfg(feature = "invariant_violations")]
{
- let error_msg = $crate::mdschema::validator::errors::invariant_violation_message(
+ let error_msg = $crate::mdschema::validation::errors::invariant_violation_message(
None,
format!($message $(, $($args)*)?),
module_path!(),
@@ -196,11 +199,6 @@ pub enum SchemaError {
/// A repeating matcher in a textual container
RepeatingMatcherInTextContainer { schema_index: usize },
- /// List node uses a non-repeating matcher.
- ///
- /// List nodes must use matchers with repetition syntax like `{1,}`.
- BadListMatcher { schema_index: usize },
-
/// Matcher has invalid extras syntax.
///
/// For example, `test:/1/`!{1,2} is invalid.
@@ -277,9 +275,7 @@ impl fmt::Display for SchemaError {
SchemaError::RepeatingMatcherInTextContainer { .. } => {
write!(f, "Repeating matcher cannot be used in text container")
}
- SchemaError::BadListMatcher { .. } => {
- write!(f, "List node requires repeating matcher syntax")
- }
+
SchemaError::InvalidMatcherExtras { error, .. } => {
write!(f, "Invalid matcher extras: {}", error)
}
@@ -344,6 +340,16 @@ pub enum SchemaViolationError {
kind: NodeContentMismatchKind,
},
+ /// Not enough nodes for a repeating paragraph.
+ NotEnoughNodesForRepeatingParagraph {
+ schema_index: usize,
+ input_index: usize,
+ /// Expected number of children from schema.
+ expected: ChildrenLengthRange,
+ /// Actual number of children in input.
+ actual: usize,
+ },
+
/// Matcher appears in list context without repetition syntax.
///
/// List nodes require matchers to use `{min,max}` syntax.
@@ -357,7 +363,7 @@ pub enum SchemaViolationError {
schema_index: usize,
input_index: usize,
/// Expected number of children from schema.
- expected: ChildrenCount,
+ expected: ChildrenLengthRange, // min, max
/// Actual number of children in input.
actual: usize,
},
@@ -390,11 +396,58 @@ pub enum SchemaViolationError {
},
}
+#[derive(Debug, Clone, Hash, PartialEq, Eq)]
+pub struct ChildrenLengthRange(pub usize, pub usize);
+
+impl From<(usize, usize)> for ChildrenLengthRange {
+ fn from((min, max): (usize, usize)) -> Self {
+ ChildrenLengthRange(min, max)
+ }
+}
+
+impl From for ChildrenLengthRange {
+ fn from(min: usize) -> Self {
+ ChildrenLengthRange(min, min)
+ }
+}
+
+impl ChildrenLengthRange {
+ /// Build a range from optional min/max bounds (defaults to 0 for missing min).
+ pub fn from_optional_bounds(min: Option, max: Option) -> Self {
+ let min = min.unwrap_or(0);
+ let max = max.unwrap_or(min);
+ ChildrenLengthRange(min, max)
+ }
+
+ /// Build a range from a matcher's extras.
+ pub fn from_matcher_extras(extras: &MatcherExtras) -> Self {
+ ChildrenLengthRange::from_optional_bounds(extras.min_items(), extras.max_items())
+ }
+}
+
+impl PartialEq for ChildrenLengthRange {
+ fn eq(&self, other: &usize) -> bool {
+ self.0 == *other && self.1 == *other
+ }
+}
+
+impl std::fmt::Display for ChildrenLengthRange {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let ChildrenLengthRange(min, max) = self;
+ match (min, max) {
+ (min, max) if min == max => write!(f, "exactly {}", min),
+ (min, max) => write!(f, "between {} and {}", min, max),
+ }
+ }
+}
+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum MalformedStructureKind {
MissingListItemContent,
HadExtraListItem,
MismatchingTableCells,
+ SchemaHasChildInputDoesnt,
+ InputHasChildSchemaDoesnt,
}
impl fmt::Display for SchemaViolationError {
@@ -413,6 +466,11 @@ impl fmt::Display for SchemaViolationError {
} => {
write!(f, "Expected {} '{}', found '{}'", kind, expected, actual)
}
+ SchemaViolationError::NotEnoughNodesForRepeatingParagraph {
+ expected, actual, ..
+ } => {
+ write!(f, "Expected {} children, found {}", expected, actual)
+ }
SchemaViolationError::NonRepeatingMatcherInListContext { .. } => {
write!(f, "Non-repeating matcher used in list context")
}
@@ -442,34 +500,6 @@ impl fmt::Display for SchemaViolationError {
}
}
-#[derive(Debug, Clone, Hash, PartialEq, Eq)]
-pub enum ChildrenCount {
- SpecificCount(usize),
- Range { min: usize, max: Option },
-}
-
-impl fmt::Display for ChildrenCount {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match self {
- ChildrenCount::SpecificCount(count) => write!(f, "{}", count),
- ChildrenCount::Range { min, max } => match max {
- Some(max_val) => write!(f, "between {} and {}", min, max_val),
- None => write!(f, "at least {}", min),
- },
- }
- }
-}
-
-impl ChildrenCount {
- pub fn from_specific(count: usize) -> Self {
- ChildrenCount::SpecificCount(count)
- }
-
- pub fn from_range(min: usize, max: Option) -> Self {
- ChildrenCount::Range { min, max }
- }
-}
-
/// Errors that occur during pretty-printing of validation errors.
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum PrettyPrintError {
@@ -583,6 +613,27 @@ fn validation_error_to_ariadne(
)
.finish()
}
+ SchemaViolationError::NotEnoughNodesForRepeatingParagraph {
+ schema_index: _,
+ input_index,
+ expected,
+ actual,
+ } => {
+ let node = find_node_by_index(tree.root_node(), *input_index);
+ let node_range = node.start_byte()..node.end_byte();
+
+ Report::build(ReportKind::Error, (filename, node_range.clone()))
+ .with_message("Not enough nodes for repeating paragraph")
+ .with_label(
+ Label::new((filename, node_range))
+ .with_message(format!(
+ "Expected {} children but found {}.",
+ expected, actual
+ ))
+ .with_color(Color::Red),
+ )
+ .finish()
+ }
SchemaViolationError::NonRepeatingMatcherInListContext {
schema_index,
input_index,
@@ -595,27 +646,27 @@ fn validation_error_to_ariadne(
let input_range = input_node.start_byte()..input_node.end_byte();
Report::build(ReportKind::Error, (filename, input_range.clone()))
- .with_message("Non-repeating matcher in repeating context")
- .with_label(
- Label::new((filename, input_range))
- .with_message(
- "This input corresponds to a list node in the schema"
- )
- .with_color(Color::Blue),
- )
- .with_label(
- Label::new((filename, schema_range))
- .with_message(format!(
- "This matcher is in a list context but is not marked as repeating: '{}'",
- schema_content
- ))
- .with_color(Color::Red),
- )
- .with_help(r#"
+ .with_message("Non-repeating matcher in repeating context")
+ .with_label(
+ Label::new((filename, input_range))
+ .with_message(
+ "This input corresponds to a list node in the schema"
+ )
+ .with_color(Color::Blue),
+ )
+ .with_label(
+ Label::new((filename, schema_range))
+ .with_message(format!(
+ "This matcher is in a list context but is not marked as repeating: '{}'",
+ schema_content
+ ))
+ .with_color(Color::Red),
+ )
+ .with_help(r#"
You can mark a list node as repeating by adding a '{,} directly after the matcher, like
- `myLabel:/foo/`{1,12}
"#)
- .finish()
+ .finish()
}
SchemaViolationError::ChildrenLengthMismatch {
schema_index: _,
@@ -640,7 +691,7 @@ You can mark a list node as repeating by adding a '{,} dir
if parent.kind() == "list_item" {
report = report.with_help(
"If you want to allow any number of list items, use the {min,max} syntax \
- (e.g., `item:/pattern/`{1,} or `item:/pattern/`{0,})",
+ (e.g., `item:/pattern/`{1,} or `item:/pattern/`{0,})",
);
}
@@ -655,26 +706,26 @@ You can mark a list node as repeating by adding a '{,} dir
let node_range = node.start_byte()..node.end_byte();
Report::build(ReportKind::Error, (filename, node_range.clone()))
- .with_message("Nested list exceeds maximum depth")
- .with_label(
- Label::new((filename, node_range))
- .with_message(format!(
- "List nesting exceeds maximum depth of {} level(s).",
- max_depth,
- ))
- .with_color(Color::Red),
- )
- .with_help(
- "For schemas like:\n\
- - `num1:/\\d/`{1,}\n\
- \u{20} - `num2:/\\d/`{1,}{1,}\n\
- \n\
- You may need to adjust the repetition for the first matcher\n\
- to allow for the depth of the following ones. For example, you could\n\
- make that `num1:/\\d/`{1,}{1,}{1,} to allow for three levels of nesting (the one \
- below it, and the two allowed below that).",
- )
- .finish()
+ .with_message("Nested list exceeds maximum depth")
+ .with_label(
+ Label::new((filename, node_range))
+ .with_message(format!(
+ "List nesting exceeds maximum depth of {} level(s).",
+ max_depth,
+ ))
+ .with_color(Color::Red),
+ )
+ .with_help(
+ "For schemas like:\n\
+ - `num1:/\\d/`{1,}\n\
+ \u{20} - `num2:/\\d/`{1,}{1,}\n\
+ \n\
+ You may need to adjust the repetition for the first matcher\n\
+ to allow for the depth of the following ones. For example, you could\n\
+ make that `num1:/\\d/`{1,}{1,}{1,} to allow for three levels of nesting (the one \
+ below it, and the two allowed below that).",
+ )
+ .finish()
}
SchemaViolationError::WrongListCount {
schema_index,
@@ -711,8 +762,8 @@ You can mark a list node as repeating by adding a '{,} dir
)
.with_help(
"The number of items in `matcher`{1,2} syntax refers to the number of \
- entries at the level of that matcher (deeper items are not included in \
- that count).",
+ entries at the level of that matcher (deeper items are not included in \
+ that count).",
)
.finish()
}
@@ -770,25 +821,6 @@ You can mark a list node as repeating by adding a '{,} dir
.with_help("Text containers like paragraphs and headings cannot contain repeating matchers. Use repetition syntax only with list items.")
.finish()
}
- SchemaError::BadListMatcher { schema_index } => {
- let schema_node = find_node_by_index(tree.root_node(), *schema_index);
- let schema_content =
- node_content_by_index(tree.root_node(), *schema_index, source_content)?;
- let schema_range = schema_node.start_byte()..schema_node.end_byte();
-
- Report::build(ReportKind::Error, (filename, schema_range.clone()))
- .with_message("Bad list matcher")
- .with_label(
- Label::new((filename, schema_range))
- .with_message(format!(
- "No matchers found in children of list node: '{}'",
- schema_content
- ))
- .with_color(Color::Red),
- )
- .with_help("List nodes require repeating matcher syntax like `label:/pattern/`{1,}")
- .finish()
- }
SchemaError::UnclosedMatcher { schema_index } => {
let schema_node = find_node_by_index(tree.root_node(), *schema_index);
let schema_range = schema_node.start_byte()..schema_node.end_byte();
@@ -926,7 +958,7 @@ fn node_content_by_index<'a>(
#[cfg(test)]
mod tests {
- use crate::mdschema::validator::ts_utils::new_markdown_parser;
+ use crate::mdschema::validation::ts_utils::new_markdown_parser;
use super::*;
diff --git a/src/mdschema/validator/matcher/matcher.rs b/src/mdschema/validation/matchers/matcher.rs
similarity index 78%
rename from src/mdschema/validator/matcher/matcher.rs
rename to src/mdschema/validation/matchers/matcher.rs
index 8febb61..d9793d7 100644
--- a/src/mdschema/validator/matcher/matcher.rs
+++ b/src/mdschema/validation/matchers/matcher.rs
@@ -1,19 +1,22 @@
#![allow(dead_code)]
-use crate::{invariant_violation, mdschema::validator::matcher::matcher_extras::MatcherExtras};
+use crate::{invariant_violation, mdschema::validation::matchers::matcher_extras::MatcherExtras};
use core::fmt;
use regex::Regex;
use std::{collections::HashSet, sync::LazyLock};
use tree_sitter::TreeCursor;
-use crate::mdschema::validator::{
- matcher::matcher_extras::{MatcherExtrasError, partition_at_special_chars},
- ts_types::is_text_node,
+use crate::mdschema::validation::{
+ matchers::matcher_extras::{MatcherExtrasError, partition_at_special_chars},
+ ts_types::*,
ts_utils::{get_next_node, get_node_and_next_node, get_node_text},
};
-static REGEX_MATCHER_PATTERN: LazyLock =
- LazyLock::new(|| Regex::new(r"^(((?P[a-zA-Z0-9-_]+)):)?\/(?P.+?)\/").unwrap());
+static ID_PATTERN: LazyLock = LazyLock::new(|| Regex::new(r"^[a-zA-Z0-9-_]+$").unwrap());
+
+static REGEX_MATCHER_PATTERN: LazyLock = LazyLock::new(|| {
+ Regex::new(r"^(?:(?P[a-zA-Z0-9-_]+):)?(?:\/(?P.+?)\/|(?P[a-zA-Z0-9-_]+))$").unwrap()
+});
static RANGE_PATTERN: LazyLock = LazyLock::new(|| Regex::new(r"\{(\d*),(\d*)\}").unwrap());
@@ -90,7 +93,7 @@ fn extract_item_count_limits(text: &str) -> (Option, Option, bool)
pub struct Matcher {
id: Option,
/// A compiled regex for the pattern.
- pattern: MatcherType,
+ kind: MatcherKind,
/// Extra flags, which we receive via extra text that corresponds to the matcher
flags: HashSet,
/// Extra configuration options
@@ -100,13 +103,27 @@ pub struct Matcher {
}
#[derive(Debug, Clone)]
-pub struct MatcherType {
- regex: Regex,
+pub enum MatcherKind {
+ Regex(Regex),
+ All,
+}
+
+impl MatcherKind {
+ pub fn from_regex(regex: Regex) -> Self {
+ MatcherKind::Regex(regex)
+ }
+
+ pub fn all() -> Self {
+ MatcherKind::All
+ }
}
-impl fmt::Display for MatcherType {
+impl fmt::Display for MatcherKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
- write!(f, "{}", self.regex.as_str())
+ match self {
+ MatcherKind::Regex(regex) => write!(f, "{}", regex.as_str()),
+ MatcherKind::All => write!(f, "all"),
+ }
}
}
@@ -127,14 +144,14 @@ impl Matcher {
pub fn new(
id: Option,
flags: HashSet,
- pattern: MatcherType,
+ r#type: MatcherKind,
extras: MatcherExtras,
original_str_len: usize,
) -> Self {
Matcher {
id,
flags,
- pattern,
+ kind: r#type,
extras,
original_str_len,
}
@@ -142,7 +159,7 @@ impl Matcher {
pub fn new_with_empty_flags(
id: Option,
- pattern: MatcherType,
+ pattern: MatcherKind,
extras: MatcherExtras,
original_str_len: usize,
) -> Self {
@@ -154,9 +171,7 @@ impl Matcher {
///
/// # Arguments
/// * `pattern` - The pattern string within the matcher codeblock.
- /// * `after_str` - Optional extras string following the pattern. This must
- /// have a sequence of valid matcher extras, only followed by additional
- /// text if there is a space in between.
+ /// * `after_str` - Optional extras string following the pattern. This must have a sequence of valid matcher extras, only followed by additional text if there is a space in between.
pub fn try_from_pattern_and_suffix_str(
pattern_str: &str,
after_str: Option<&str>,
@@ -173,10 +188,10 @@ impl Matcher {
}
let (id, pattern) = match captures {
- Some(caps) => extract_id_and_pattern(&caps, &pattern_str)?,
+ Some(caps) => extract_id_and_pattern(&caps, pattern_str)?,
None => {
return Err(MatcherError::MatcherInteriorRegexInvalid(format!(
- "Expected format: 'id:/regex/', got {}", // TODO: don't hard code what we expect
+ "Expected format: 'id:/regex/' or 'id', got {}",
pattern_str
)));
}
@@ -210,10 +225,15 @@ impl Matcher {
schema_cursor: &TreeCursor,
schema_str: &str,
) -> Result {
+ // #[cfg(feature = "invariant_violations")]
+ // if !is_inline_code_node(&schema_cursor.node()) {
+ // invariant_violation!("expected inline code node for extracting a matcher");
+ // }
+
let pattern_str = get_node_text(&schema_cursor.node(), schema_str);
let next_node = get_next_node(schema_cursor);
let extras_str = next_node
- .filter(|n| is_text_node(&n)) // don't bother if not text; extras must be in text
+ .filter(|n| is_text_node(n)) // don't bother if not text; extras must be in text
.map(|n| get_node_text(&n, schema_str))
.and_then(|n| partition_at_special_chars(n).map(|(extras, _)| extras));
@@ -222,9 +242,12 @@ impl Matcher {
/// Get an actual match string for a given text, if it matches.
pub fn match_str<'a>(&self, text: &'a str) -> Option<&'a str> {
- match self.pattern.regex.find(text) {
- Some(mat) => Some(&text[mat.start()..mat.end()]),
- None => None,
+ match &self.kind {
+ MatcherKind::Regex(regex) => {
+ let mat = regex.find(text)?;
+ Some(&text[mat.start()..mat.end()])
+ }
+ MatcherKind::All => Some(text),
}
}
@@ -235,7 +258,7 @@ impl Matcher {
/// The ID of the matcher. This is the key in the final JSON.
pub fn id(&self) -> Option<&str> {
- self.id.as_ref().map(|s| s.as_str())
+ self.id.as_deref()
}
/// Get a reference to the extras
@@ -244,8 +267,8 @@ impl Matcher {
}
/// Get a reference to the pattern
- pub fn pattern(&self) -> &MatcherType {
- &self.pattern
+ pub fn pattern(&self) -> &MatcherKind {
+ &self.kind
}
/// The original string length of the matcher including the `s.
@@ -267,11 +290,15 @@ impl Matcher {
_ => true,
}
}
+
+ pub fn kind(&self) -> &MatcherKind {
+ &self.kind
+ }
}
impl PartialEq for Matcher {
fn eq(&self, other: &Self) -> bool {
- self.id == other.id && format!("{}", self.pattern) == format!("{}", other.pattern)
+ self.id == other.id && format!("{}", self.kind) == format!("{}", other.kind)
}
}
@@ -279,40 +306,53 @@ impl PartialEq for Matcher {
fn extract_id_and_pattern(
captures: ®ex::Captures,
pattern: &str,
-) -> Result<(Option, MatcherType), MatcherError> {
- let id = captures.name("id").map(|m| m.as_str().to_string());
+) -> Result<(Option, MatcherKind), MatcherError> {
+ // Check if we have a bare ID (e.g., `word`)
+ if let Some(bare_id) = captures.name("bare_id") {
+ let id = bare_id.as_str().to_string();
+ return Ok((Some(id), MatcherKind::all()));
+ }
+
+ // Otherwise, we have a regex pattern (e.g., `id:/regex/` or `/regex/`)
+ let id = captures
+ .name("id_with_regex")
+ .map(|m| m.as_str().to_string());
let regex_pattern = captures
.name("regex")
.map(|m| m.as_str().to_string())
.ok_or_else(|| {
MatcherError::MatcherInteriorRegexInvalid(format!(
- "Expected format: 'id:/regex/', got {}",
+ "Expected format: 'id:/regex/' or 'id', got {}",
pattern
))
})?;
- let matcher = MatcherType {
- regex: Regex::new(&format!("^{}", regex_pattern)).map_err(|e| {
+ // Create a regex matcher from the pattern
+ let matcher =
+ MatcherKind::from_regex(Regex::new(&format!("^{}", regex_pattern)).map_err(|e| {
MatcherError::MatcherInteriorRegexInvalid(format!("Invalid regex pattern: {}", e))
- })?,
- };
+ })?);
Ok((id, matcher))
}
impl fmt::Display for Matcher {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- let regex_str = self.pattern.regex.as_str();
- // The regex is stored as "^", so remove the leading ^
- let pattern_str = if regex_str.starts_with('^') {
- ®ex_str[1..]
- } else {
- regex_str
- };
-
- match &self.id {
- Some(id) => write!(f, "{}:/{}/", id, pattern_str),
- None => write!(f, "/{}/", pattern_str),
+ match &self.kind {
+ MatcherKind::Regex(regex) => {
+ let regex_str = regex.as_str();
+ // The regex is stored as "^", so remove the leading ^
+ let pattern_str = regex_str.strip_prefix('^').unwrap_or(regex_str);
+
+ match &self.id {
+ Some(id) => write!(f, "{}:/{}/", id, pattern_str),
+ None => write!(f, "/{}/", pattern_str),
+ }
+ }
+ MatcherKind::All => match &self.id {
+ Some(id) => write!(f, "{}:/all/", id),
+ None => write!(f, "/all/"),
+ },
}
}
}
@@ -377,16 +417,30 @@ pub fn extract_text_matcher(cursor: &TreeCursor, str: &str) -> Result {
+ assert_eq!(matcher.id, Some("word".to_string()));
+ assert_eq!(matcher.match_str("hello world"), Some("hello world"));
+ assert_eq!(matcher.match_str("1234"), Some("1234"));
+ assert_eq!(matcher.match_str("!@#$"), Some("!@#$"));
+ }
+ kind => panic!("Unexpected matcher kind: {:?}", kind),
+ }
+ }
+
+ #[test]
+ fn test_matcher_creation_and_matching_regex() {
let matcher = Matcher::try_from_pattern_and_suffix_str("`word:/\\w+/`", None).unwrap();
assert_eq!(matcher.id, Some("word".to_string()));
assert_eq!(matcher.match_str("hello world"), Some("hello"));
@@ -394,10 +448,36 @@ mod tests {
assert_eq!(matcher.match_str("!@#$"), None);
}
+ #[test]
+ fn test_all_matcher_matches_everything() {
+ let matcher = Matcher::try_from_pattern_and_suffix_str("`my_id`", None).unwrap();
+ assert_eq!(matcher.id, Some("my_id".to_string()));
+
+ // All matcher acts as identity function - always matches and returns exactly what was passed
+ assert_eq!(matcher.match_str("hello"), Some("hello"));
+ assert_eq!(matcher.match_str("test123"), Some("test123"));
+ assert_eq!(matcher.match_str("under_score"), Some("under_score"));
+ assert_eq!(matcher.match_str("MixedCase123"), Some("MixedCase123"));
+
+ // Should match special characters too - identity function
+ assert_eq!(matcher.match_str("@*&^R"), Some("@*&^R"));
+ assert_eq!(matcher.match_str("!test"), Some("!test"));
+ assert_eq!(matcher.match_str("-dash"), Some("-dash"));
+
+ // Matches everything including spaces and special characters
+ assert_eq!(matcher.match_str("valid-later"), Some("valid-later"));
+ assert_eq!(matcher.match_str("test@symbol"), Some("test@symbol"));
+ assert_eq!(
+ matcher.match_str("anything at all!"),
+ Some("anything at all!")
+ );
+ }
+
#[test]
fn test_matcher_invalid_pattern() {
- // Test error handling for invalid pattern using try_from_pattern_and_suffix_str
- let result = Matcher::try_from_pattern_and_suffix_str("`invalid_pattern`", None);
+ // Test error handling for truly invalid pattern (invalid chars for ID, not a regex)
+ let result =
+ Matcher::try_from_pattern_and_suffix_str("`invalid pattern with spaces`", None);
assert!(result.is_err());
match result.as_ref().unwrap_err() {
MatcherError::MatcherInteriorRegexInvalid(_) => {
@@ -416,7 +496,7 @@ mod tests {
// rather than there being wrong ones. We probably want to change this
// eventually though.
let result = Matcher::try_from_pattern_and_suffix_str("`name:/test/`", Some("bullshit"));
- assert!(!result.is_err()); // TODO: for now
+ assert!(result.is_ok()); // TODO: for now
}
#[test]
diff --git a/src/mdschema/validator/matcher/matcher_extras.rs b/src/mdschema/validation/matchers/matcher_extras.rs
similarity index 95%
rename from src/mdschema/validator/matcher/matcher_extras.rs
rename to src/mdschema/validation/matchers/matcher_extras.rs
index 61e936b..4a8f4cc 100644
--- a/src/mdschema/validator/matcher/matcher_extras.rs
+++ b/src/mdschema/validation/matchers/matcher_extras.rs
@@ -3,7 +3,7 @@
use regex::Regex;
use std::sync::LazyLock;
-use crate::mdschema::validator::matcher::matcher::LITERAL_INDICATOR;
+use crate::mdschema::validation::matchers::matcher::LITERAL_INDICATOR;
static RANGE_PATTERN: LazyLock = LazyLock::new(|| Regex::new(r"\{(\d*),(\d*)\}").unwrap());
@@ -47,7 +47,7 @@ pub fn has_literal_within_extras(text: &str) -> bool {
&& text.len() != 1
&& !{
match partition_at_special_chars(&text[1..]) {
- Some((extras, _after)) => extras == "",
+ Some((extras, _after)) => extras.is_empty(),
None => false,
}
}
@@ -98,7 +98,7 @@ impl std::fmt::Display for MatcherExtrasError {
/// # Examples
///
/// ```
-/// use mdvalidate::mdschema::validator::matcher::matcher_extras::MatcherExtras;
+/// use mdvalidate::mdschema::validation::matchers::matcher_extras::MatcherExtras;
///
/// // Matcher with repeat limits: `name:/\w+/`{2,5}
/// let extras = MatcherExtras::try_new(Some("{2,5}")).unwrap();
@@ -128,11 +128,10 @@ impl MatcherExtras {
/// * `text` - Optional text following the matcher code block
pub fn try_new(text: Option<&str>) -> Result {
// Check if text matches the pattern, if text is provided
- if let Some(text) = text {
- if !MATCHERS_EXTRA_PATTERN.is_match(text) {
+ if let Some(text) = text
+ && !MATCHERS_EXTRA_PATTERN.is_match(text) {
return Err(MatcherExtrasError::MatcherExtrasInvalid);
}
- }
Ok(match text {
Some(text) => {
@@ -201,11 +200,21 @@ impl MatcherExtras {
self.min_items
}
+ /// Return minimum items or the provided default.
+ pub fn min_items_or(&self, default: usize) -> usize {
+ self.min_items.unwrap_or(default)
+ }
+
/// Return optional maximum number of items at this list level
pub fn max_items(&self) -> Option {
self.max_items
}
+ /// Return maximum items or the provided default.
+ pub fn max_items_or(&self, default: usize) -> usize {
+ self.max_items.unwrap_or(default)
+ }
+
/// Whether min/max constraints were specified
pub fn had_min_max(&self) -> bool {
self.had_min_max
diff --git a/src/mdschema/validator/matcher/mod.rs b/src/mdschema/validation/matchers/mod.rs
similarity index 100%
rename from src/mdschema/validator/matcher/mod.rs
rename to src/mdschema/validation/matchers/mod.rs
diff --git a/src/mdschema/validator/mod.rs b/src/mdschema/validation/mod.rs
similarity index 77%
rename from src/mdschema/validator/mod.rs
rename to src/mdschema/validation/mod.rs
index cfeb164..7e40e37 100644
--- a/src/mdschema/validator/mod.rs
+++ b/src/mdschema/validation/mod.rs
@@ -1,7 +1,7 @@
pub mod errors;
-pub mod matcher;
+pub mod matchers;
pub(crate) mod node_pos_pair;
-pub(crate) mod node_walker;
+pub(crate) mod walkers;
pub(crate) mod ts_types;
pub(crate) mod ts_utils;
mod utils;
diff --git a/src/mdschema/validator/node_pos_pair.rs b/src/mdschema/validation/node_pos_pair.rs
similarity index 94%
rename from src/mdschema/validator/node_pos_pair.rs
rename to src/mdschema/validation/node_pos_pair.rs
index c5ccda6..e71a782 100644
--- a/src/mdschema/validator/node_pos_pair.rs
+++ b/src/mdschema/validation/node_pos_pair.rs
@@ -28,7 +28,7 @@ impl NodePosPair {
}
/// Convert the `NodePosPair` to a tuple of schema and input indexes.
- pub fn to_pos(&self) -> (usize, usize) {
+ pub fn as_pos(&self) -> (usize, usize) {
(self.schema_index, self.input_index)
}
@@ -45,7 +45,7 @@ impl NodePosPair {
schema_cursor: &mut TreeCursor,
input_cursor: &mut TreeCursor,
) {
- let (schema_pos, input_pos) = self.to_pos();
+ let (schema_pos, input_pos) = self.as_pos();
schema_cursor.goto_descendant(schema_pos);
input_cursor.goto_descendant(input_pos);
diff --git a/src/mdschema/validator/ts_types.rs b/src/mdschema/validation/ts_types.rs
similarity index 94%
rename from src/mdschema/validator/ts_types.rs
rename to src/mdschema/validation/ts_types.rs
index f617d36..7af5ba2 100644
--- a/src/mdschema/validator/ts_types.rs
+++ b/src/mdschema/validation/ts_types.rs
@@ -132,6 +132,12 @@ node_kind_pair!(
"Check if both nodes are tables.",
["table"]
);
+node_kind_pair!(
+ is_table_data_row_node,
+ both_are_table_data_rows,
+ "Check if both nodes are table data rows.",
+ ["table_data_row"]
+);
node_kind_pair!(
is_table_cell_node,
both_are_table_cells,
@@ -189,13 +195,9 @@ node_predicate_pair!(
/// Check if both nodes are top-level nodes (document or heading).
pub fn both_are_matching_top_level_nodes(schema_node: &Node, input_node: &Node) -> bool {
- if schema_node.kind() != input_node.kind() {
- return false;
- }
-
match schema_node.kind() {
- "document" => true,
- "atx_heading" => true,
+ _ if schema_node.kind() != input_node.kind() => false,
+ "document" | "atx_heading" => true,
_ => false,
}
}
diff --git a/src/mdschema/validator/ts_utils.rs b/src/mdschema/validation/ts_utils.rs
similarity index 82%
rename from src/mdschema/validator/ts_utils.rs
rename to src/mdschema/validation/ts_utils.rs
index 4045bd9..91eef0e 100644
--- a/src/mdschema/validator/ts_utils.rs
+++ b/src/mdschema/validation/ts_utils.rs
@@ -4,9 +4,9 @@ use crate::invariant_violation;
use tree_sitter::{Node, Parser, Tree, TreeCursor};
use tree_sitter_markdown::language;
-use crate::mdschema::validator::{errors::ValidationError, validator::ValidatorState};
#[cfg(feature = "invariant_violations")]
-use crate::mdschema::validator::ts_types::is_marker_node;
+use crate::mdschema::validation::ts_types::*;
+use crate::mdschema::validation::{errors::ValidationError, validator::ValidatorState};
use regex::Regex;
use std::sync::LazyLock;
@@ -14,7 +14,13 @@ use std::sync::LazyLock;
/// Extract text from a tree-sitter node using the provided source string.
pub fn get_node_text<'a, S: Into<&'a str>>(node: &Node, src: S) -> &'a str {
let src_ref = src.into();
- node.utf8_text(src_ref.as_bytes()).unwrap()
+ let node_str = node.utf8_text(src_ref.as_bytes()).unwrap();
+
+ if is_table_cell_node(node) || node.parent().is_some_and(|n| is_table_cell_node(&n)) {
+ node_str.trim_start().trim_end()
+ } else {
+ node_str
+ }
}
/// Ordered lists use numbers followed by period . or right paren )
@@ -187,90 +193,97 @@ pub fn has_single_code_child(schema_cursor: &TreeCursor) -> bool {
code_child_count == 1
}
-/// Extract the language and body of a codeblock.
-///
-/// # Arguments
-///
-/// * `cursor`: The cursor pointing to the codeblock node.
-/// * `src`: The source text of the document.
-///
-/// # Returns
-///
-/// An `Option` containing:
-/// - The optional language tuple: `(language_string, descendant_index)` if the language text is present
-/// - The body tuple: `(body_string, descendant_index)` of the code content
-/// Where `descendant_index` is the index of the descendant node that contains the language or body text.
-///
-/// Returns `None` if the codeblock is invalid or it isn't a codeblock to begin with.
-pub fn extract_codeblock_contents(
- cursor: &TreeCursor,
- src: &str,
-) -> Result