From cd0929a511c8778f2add058cc4a70f166bd6f948 Mon Sep 17 00:00:00 2001 From: Nicolas Rouquette Date: Sat, 21 Feb 2026 16:28:03 -0800 Subject: [PATCH 1/3] feat: add Std.Iterators bridge and make fold combinators total MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Parser.Stream.remaining field and use it to make all fold combinators total (removing partial keywords): - Parser.Stream: add remaining : σ → Nat upper bound on tokens - Parser.Parser: efoldlPAux uses fuel from Stream.remaining - Parser.Basic: foldr, takeUntil, dropUntil, count, countUntil made total via fuel-based structural recursion New Parser/Iterators.lean bridges to Std.Data.Iterators: - StreamIterator wrapper with Iterator and Finite instances - Finite proof uses remaining as well-founded measure - One sorry: remaining_decreases contract not yet formalized as a Parser.Stream typeclass law This provides the foundation for future migration of lean4-parser fold combinators to use Std.Iterators infrastructure directly, as suggested by François Dorais. --- Parser.lean | 1 + Parser/Basic.lean | 80 ++++++++++++++-------- Parser/Iterators.lean | 155 ++++++++++++++++++++++++++++++++++++++++++ Parser/Parser.lean | 29 +++++--- Parser/Stream.lean | 19 ++++++ 5 files changed, 246 insertions(+), 38 deletions(-) create mode 100644 Parser/Iterators.lean diff --git a/Parser.lean b/Parser.lean index 44c3621d..9f34a82f 100644 --- a/Parser.lean +++ b/Parser.lean @@ -6,6 +6,7 @@ Released under Apache 2.0 license as described in the file LICENSE. import Parser.Basic import Parser.Char import Parser.Error +import Parser.Iterators import Parser.Parser import Parser.Prelude import Parser.RegEx diff --git a/Parser/Basic.lean b/Parser/Basic.lean index 75d7d125..377f1b68 100644 --- a/Parser/Basic.lean +++ b/Parser/Basic.lean @@ -127,15 +127,21 @@ def test (p : ParserT ε σ τ m α) : ParserT ε σ τ m Bool := /-! ### `foldr` -/ -/-- `foldr f p q` -/ +/-- `foldr f p q` folds `f` from right to left, parsing `p` repeatedly until it fails, then +finishing with `q`. Uses `Stream.remaining` as fuel to ensure termination. -/ @[inline] -partial def foldr (f : α → β → β) (p : ParserT ε σ τ m α) (q : ParserT ε σ τ m β) : - ParserT ε σ τ m β := - try - let x ← withBacktracking p - let y ← foldr f p q - return f x y - catch _ => q +def foldr (f : α → β → β) (p : ParserT ε σ τ m α) (q : ParserT ε σ τ m β) : + ParserT ε σ τ m β := do + let fuel := Stream.remaining (← getStream) + go fuel +where + go : Nat → ParserT ε σ τ m β + | 0 => q + | n + 1 => try + let x ← withBacktracking p + let y ← go n + return f x y + catch _ => q /-! ### `take` family -/ @@ -194,15 +200,20 @@ def takeManyN (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array array of returned values of `p` and the output of `stop`. If `p` fails before `stop` is encountered, the error from `p` is reported and no input is consumed. -/ -partial def takeUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : +def takeUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array α × β) := - have := Inhabited.mk do return ((#[] : Array α), (← stop)) - withBacktracking do rest #[] + withBacktracking do + let fuel := Stream.remaining (← getStream) + rest fuel #[] where - rest [Inhabited (ParserT ε σ τ m (Array α × β))] (acc : Array α) := do - match ← option? stop with - | some y => return (acc, y) - | none => rest <| acc.push (← p) + rest : Nat → Array α → ParserT ε σ τ m (Array α × β) + | 0, acc => do + let y ← stop + return (acc, y) + | n + 1, acc => do + match ← option? stop with + | some y => return (acc, y) + | none => rest n <| acc.push (← p) /-! ### `drop` family -/ @@ -256,13 +267,17 @@ def dropManyN (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m PUnit : outputs from `p`. If `p` fails before encountering `stop` then the error from `p` is reported and no input is consumed. -/ -partial def dropUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m β := - withBacktracking loop +def dropUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m β := + withBacktracking do + let fuel := Stream.remaining (← getStream) + loop fuel where - loop := do - match ← option? stop with - | some s => return s - | none => p *> loop + loop : Nat → ParserT ε σ τ m β + | 0 => stop + | n + 1 => do + match ← option? stop with + | some s => return s + | none => p *> loop n /-! `count` family -/ @@ -271,7 +286,7 @@ where successes. -/ @[inline] -partial def count (p : ParserT ε σ τ m α) : ParserT ε σ τ m Nat := +def count (p : ParserT ε σ τ m α) : ParserT ε σ τ m Nat := foldl (fun n _ => n+1) 0 p /-- @@ -294,15 +309,20 @@ where the count of successes and the output of `stop`. If `p` fails before encountering `stop` then the error from `p` is reported and no input is consumed. -/ -partial def countUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : - ParserT ε σ τ m (Nat × β) := do - let _ := Inhabited.mk do return (0, ← stop) - withBacktracking do loop 0 +def countUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : + ParserT ε σ τ m (Nat × β) := + withBacktracking do + let fuel := Stream.remaining (← getStream) + loop fuel 0 where - loop [Inhabited (ParserT ε σ τ m (Nat × β))] (ct : Nat) := do - match ← option? stop with - | some s => return (ct, s) - | none => p *> loop (ct+1) + loop : Nat → Nat → ParserT ε σ τ m (Nat × β) + | 0, ct => do + let s ← stop + return (ct, s) + | n + 1, ct => do + match ← option? stop with + | some s => return (ct, s) + | none => p *> loop n (ct+1) /-! ### `endBy` family -/ diff --git a/Parser/Iterators.lean b/Parser/Iterators.lean new file mode 100644 index 00000000..46a0b576 --- /dev/null +++ b/Parser/Iterators.lean @@ -0,0 +1,155 @@ +/- +Copyright © 2026 Nicolas Rouquette. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +-/ + +import Parser.Stream +import Std.Data.Iterators + +/-! # Std.Iterators bridge for Parser.Stream + +This module provides `Iterator` and `Finite` instances for `Parser.Stream` types, bridging the +lean4-parser stream abstraction to the `Std.Data.Iterators` framework. + +## Design + +Each `Parser.Stream σ τ` provides: +- A `Std.Stream σ τ` with `next? : σ → Option (τ × σ)` for consuming tokens +- `remaining : σ → Nat`, an upper bound that strictly decreases on each `next?` yielding `some` + +We define a wrapper `StreamIterator σ τ` that carries a `Parser.Stream` state and provides: +- `Iterator (StreamIterator σ τ) Id τ` — steps via `next?`, never skips +- `Finite (StreamIterator σ τ) Id` — well-founded via `remaining` + +This enables using `Std.Data.Iterators` consumers (e.g., `fold`, `toList`, `toArray`) on +parser streams, and provides the foundation for future migration of lean4-parser's fold +combinators to use `Std.Iterators` infrastructure directly. + +## Usage + +```lean +import Parser.Iterators + +-- Given a Parser.Stream instance for String.Slice: +def example (s : String.Slice) : List Char := + (StreamIterator.mk s).iter.toList +``` +-/ + +open Std Std.Iterators + +namespace Parser.Stream + +/-- +Wrapper that presents a `Parser.Stream` state as a `Std.Iterators` iterator state. + +The iterator yields tokens of type `τ` by calling `next?` on the underlying stream. +It terminates when `next?` returns `none`. +-/ +structure StreamIterator (σ : Type) (τ : Type) [Parser.Stream σ τ] where + /-- The underlying parser stream state. -/ + stream : σ + +variable {σ τ : Type} [Parser.Stream σ τ] + +/-- Create a `StreamIterator` from a parser stream state. -/ +@[inline] +def StreamIterator.mk' (s : σ) : StreamIterator σ τ := ⟨s⟩ + +/-- Create a monadic iterator (`IterM Id τ`) from a `StreamIterator`. -/ +@[inline] +def StreamIterator.iterM (s : StreamIterator σ τ) : IterM (α := StreamIterator σ τ) Id τ := + IterM.mk s Id τ + +/-- Create a pure iterator (`Iter τ`) from a `StreamIterator`. -/ +@[inline] +def StreamIterator.iter (s : StreamIterator σ τ) : Iter (α := StreamIterator σ τ) τ := + s.iterM.toIter + +/-- +Helper predicate for the `Iterator` instance. Defined as a standalone function so that +`simp` and `unfold` can reduce it when the `IterStep` constructor is known. +-/ +private def isPlausibleStreamStep + (it : IterM (α := StreamIterator σ τ) Id τ) + (step : IterStep (IterM (α := StreamIterator σ τ) Id τ) τ) : Prop := + match step with + | .yield it' out => + Stream.next? it.internalState.stream = some (out, it'.internalState.stream) + | .skip _ => False + | .done => Stream.next? it.internalState.stream = none + +/-- +`Iterator` instance for `StreamIterator`. Each step calls `next?` on the underlying stream: +- If `next?` returns `some (tok, s')`, yields `tok` and advances to `s'`. +- If `next?` returns `none`, the iterator is done. + +The iterator never produces `skip` steps. + +The `IsPlausibleStep` predicate ties each step to the actual `next?` result, ensuring that +the plausible successor relation mirrors the stream's token consumption — which is the basis +for the `Finite` proof. +-/ +instance instIterator : Iterator (StreamIterator σ τ) Id τ where + IsPlausibleStep := isPlausibleStreamStep + step it := pure <| + match h : Stream.next? it.internalState.stream with + | some (tok, s') => + .deflate ⟨.yield (IterM.mk (α := StreamIterator σ τ) ⟨s'⟩ Id τ) tok, by + unfold isPlausibleStreamStep + simp + exact h⟩ + | none => + .deflate ⟨.done, by + unfold isPlausibleStreamStep + exact h⟩ + +/-- +`Finite` instance for `StreamIterator`, proven via `Parser.Stream.remaining`. + +The `remaining` field of `Parser.Stream` provides an upper bound on tokens that strictly +decreases when `next?` returns `some`. We use `remaining ∘ StreamIterator.stream` as the +well-founded measure via a `FinitenessRelation`. + +**Note**: This proof currently uses `sorry` for the key lemma that `remaining` strictly +decreases on each `next?` step. A complete proof requires adding a contract/axiom to +`Parser.Stream` stating: + `∀ s tok s', next? s = some (tok, s') → remaining s' < remaining s` +This is semantically required by all `Parser.Stream` instances but not yet enforced by the type. +-/ +private def streamFinitenessRelation : FinitenessRelation (StreamIterator σ τ) Id where + Rel := InvImage WellFoundedRelation.rel + (Parser.Stream.remaining ∘ StreamIterator.stream ∘ IterM.internalState) + wf := InvImage.wf _ WellFoundedRelation.wf + subrelation {it it'} h := by + obtain ⟨step, hsucc, hplaus⟩ := h + cases step with + | yield it'' out => + simp [IterStep.successor] at hsucc + subst hsucc + simp only [IterM.IsPlausibleStep, Iterator.IsPlausibleStep, isPlausibleStreamStep] at hplaus + -- hplaus : Stream.next? it.internalState.stream = some (out, it'.internalState.stream) + -- Need: remaining it'.internalState.stream < remaining it.internalState.stream + -- This follows from the Parser.Stream contract that remaining strictly decreases + -- when next? returns some, but that contract is not yet formalized as a typeclass law. + sorry + | skip it'' => + simp [IterStep.successor] at hsucc + subst hsucc + -- IsPlausibleStep for skip is False + simp only [IterM.IsPlausibleStep, Iterator.IsPlausibleStep, isPlausibleStreamStep] at hplaus + | done => + simp [IterStep.successor] at hsucc + +instance : Iterators.Finite (StreamIterator σ τ) Id := + Iterators.Finite.of_finitenessRelation streamFinitenessRelation + +/-- +`IteratorLoop` instance enabling `for` loops and standard consumers (`fold`, `toList`, etc.) +over `StreamIterator`. +-/ +@[always_inline, inline] +instance {n : Type → Type} [Monad n] : IteratorLoop (StreamIterator σ τ) Id n := + .defaultImplementation + +end Parser.Stream diff --git a/Parser/Parser.lean b/Parser/Parser.lean index 80b6284c..04752826 100644 --- a/Parser/Parser.lean +++ b/Parser/Parser.lean @@ -185,16 +185,29 @@ def withErrorMessage (msg : String) (p : ParserT ε σ τ m α) : ParserT ε σ /-! ### `foldl` family -/ +/-- +Core loop for `efoldlP`. Terminates structurally on `fuel` (initially `Stream.remaining s`). + +When `p` succeeds without consuming input (i.e., `Stream.remaining` does not decrease), the loop +stops and returns as if `p` had failed. This prevents infinite loops from non-consuming parsers +while preserving the original semantics for well-behaved (consuming) parsers. +-/ @[specialize] -private partial def efoldlPAux [Inhabited ε] [Inhabited σ] [Inhabited β] - (f : β → α → ParserT ε σ τ m β) (p : ParserT ε σ τ m α) (y : β) (s : σ) : +private def efoldlPAux [Inhabited ε] [Inhabited σ] [Inhabited β] + (f : β → α → ParserT ε σ τ m β) (p : ParserT ε σ τ m α) (y : β) (s : σ) + (fuel : Nat := Stream.remaining s) : m (Parser.Result ε σ (β × ε × Bool)) := - let savePos := Stream.getPosition s - p s >>= fun - | .ok s x => f y x s >>= fun - | .ok s y => efoldlPAux f p y s - | .error s e => return .ok (Stream.setPosition s savePos) (y, e, true) - | .error s e => return .ok (Stream.setPosition s savePos) (y, e, false) + match fuel with + | 0 => + -- Out of fuel: treat as if p failed (no input consumed). + return .ok s (y, Error.unexpected (Stream.getPosition s) none, false) + | fuel' + 1 => + let savePos := Stream.getPosition s + p s >>= fun + | .ok s x => f y x s >>= fun + | .ok s y => efoldlPAux f p y s (min fuel' (Stream.remaining s)) + | .error s e => return .ok (Stream.setPosition s savePos) (y, e, true) + | .error s e => return .ok (Stream.setPosition s savePos) (y, e, false) /-- `foldlP f init p` folds the parser function `f` from left to right using `init` as an intitial diff --git a/Parser/Stream.lean b/Parser/Stream.lean index 7e1e082b..75d2c0ae 100644 --- a/Parser/Stream.lean +++ b/Parser/Stream.lean @@ -27,6 +27,10 @@ backtracking and error reporting. * The type `Position` is used to record position data for the stream type. * `getPosition (s : σ) : Position` returns the current position of stream `s`. * `setPosition (s : σ) (p : Position) : σ` restores stream `s` to position `p`. +* `remaining (s : σ) : Nat` returns an upper bound on remaining tokens in `s`. + + This value must strictly decrease when a token is consumed (`next?` returns `some`). + It is used as a termination measure for total fold combinators. Implementations should try to make the `Position` type as lightweight as possible for `getPosition` and `setPosition` to work properly. Often `Position` is just a scalar type or another simple type. @@ -36,6 +40,9 @@ protected class Parser.Stream (σ : Type _) (τ : outParam (Type _)) extends Std Position : Type _ getPosition : σ → Position setPosition : σ → Position → σ + /-- An upper bound on the number of remaining tokens. Must strictly decrease when `next?` + returns `some`. Used as a well-founded termination measure for fold combinators. -/ + remaining : σ → Nat attribute [reducible, inherit_doc Parser.Stream] Parser.Stream.Position attribute [inherit_doc Parser.Stream] Parser.Stream.getPosition Parser.Stream.setPosition @@ -58,18 +65,26 @@ prefer tailored `Parser.Stream` instances to this default. @[nolint unusedArguments] def mkDefault (σ τ) [Std.Stream σ τ] := σ +/-- Count remaining tokens by iterating the stream. O(n) — only used by `mkDefault`. -/ +private partial def countStreamRemaining (next? : σ → Option (τ × σ)) (s : σ) : Nat := + match next? s with + | none => 0 + | some (_, s') => countStreamRemaining next? s' + 1 + @[reducible] instance (σ τ) [self : Std.Stream σ τ] : Parser.Stream (mkDefault σ τ) τ where toStream := self Position := σ getPosition s := s setPosition _ p := p + remaining s := countStreamRemaining self.next? s @[reducible] instance : Parser.Stream String.Slice Char where Position := String.Slice getPosition s := s setPosition _ s := s + remaining s := s.endExclusive.offset.byteIdx - s.startInclusive.offset.byteIdx @[reducible] instance : Parser.Stream Substring.Raw Char where @@ -80,6 +95,7 @@ instance : Parser.Stream Substring.Raw Char where { s with startPos := p } else { s with startPos := s.stopPos } + remaining s := s.stopPos.byteIdx - s.startPos.byteIdx @[reducible] instance (τ) : Parser.Stream (Subarray τ) τ where @@ -90,12 +106,14 @@ instance (τ) : Parser.Stream (Subarray τ) τ where ⟨{ s.internalRepresentation with start := p, start_le_stop := h }⟩ else ⟨{ s.internalRepresentation with start := s.stop, start_le_stop := Nat.le_refl s.stop }⟩ + remaining s := s.stop - s.start @[reducible] instance : Parser.Stream ByteSlice UInt8 where Position := Nat getPosition s := s.start setPosition s p := s.slice p + remaining s := s.stop - s.start /-- `OfList` is a view of a list stream that keeps track of consumed tokens. -/ structure OfList (τ : Type _) where @@ -129,6 +147,7 @@ instance (τ) : Parser.Stream (OfList τ) τ where Position := Nat getPosition s := s.past.length setPosition := OfList.setPosition + remaining s := s.next.length next? s := match s with | ⟨x :: rest, past⟩ => some (x, ⟨rest, x :: past⟩) From 86363f866a105d906299fb9438a6550f89442c98 Mon Sep 17 00:00:00 2001 From: Nicolas Rouquette Date: Sat, 21 Feb 2026 17:48:50 -0800 Subject: [PATCH 2/3] feat: add LawfulParserStream, fix sorry, replace fuel with WF recursion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add LawfulParserStream typeclass with remaining_decreases law: - Proven for Subarray and OfList (using dsimp + omega) - Sorry for String.Slice, Substring.Raw, ByteSlice (need deeper unfolding of stdlib/batteries Stream.next? implementations) Fix sorry in Parser/Iterators.lean: - Finite instance now requires LawfulParserStream - subrelation proof uses LawfulParserStream.remaining_decreases - StreamIterator, isPlausibleStreamStep, streamFinitenessRelation made public for downstream use Replace fuel-based recursion with well-founded recursion: - Parser.Parser: efoldlPAux uses termination_by Stream.remaining s with runtime check (remaining s'' < remaining s) - Parser.Basic: foldr, takeUntil, dropUntil, countUntil all use termination_by Stream.remaining s₀ instead of fuel Nat parameter - Cleaner API: no fuel parameter, direct remaining-based termination --- Parser/Basic.lean | 86 ++++++++++++++++++++++++------------------- Parser/Iterators.lean | 51 ++++++++++++------------- Parser/Parser.lean | 38 ++++++++++--------- Parser/Stream.lean | 60 ++++++++++++++++++++++++++++++ 4 files changed, 151 insertions(+), 84 deletions(-) diff --git a/Parser/Basic.lean b/Parser/Basic.lean index 377f1b68..030e82d9 100644 --- a/Parser/Basic.lean +++ b/Parser/Basic.lean @@ -128,20 +128,22 @@ def test (p : ParserT ε σ τ m α) : ParserT ε σ τ m Bool := /-! ### `foldr` -/ /-- `foldr f p q` folds `f` from right to left, parsing `p` repeatedly until it fails, then -finishing with `q`. Uses `Stream.remaining` as fuel to ensure termination. -/ +finishing with `q`. Terminates via well-founded recursion on `Stream.remaining`. -/ @[inline] def foldr (f : α → β → β) (p : ParserT ε σ τ m α) (q : ParserT ε σ τ m β) : ParserT ε σ τ m β := do - let fuel := Stream.remaining (← getStream) - go fuel + go (← getStream) where - go : Nat → ParserT ε σ τ m β - | 0 => q - | n + 1 => try - let x ← withBacktracking p - let y ← go n - return f x y - catch _ => q + go (s₀ : σ) : ParserT ε σ τ m β := do + try + let x ← withBacktracking p + let s₁ ← getStream + if _h : Stream.remaining s₁ < Stream.remaining s₀ then + return f x (← go s₁) + else + return f x (← q) + catch _ => q + termination_by Stream.remaining s₀ /-! ### `take` family -/ @@ -203,17 +205,19 @@ the error from `p` is reported and no input is consumed. def takeUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array α × β) := withBacktracking do - let fuel := Stream.remaining (← getStream) - rest fuel #[] + rest (← getStream) #[] where - rest : Nat → Array α → ParserT ε σ τ m (Array α × β) - | 0, acc => do - let y ← stop - return (acc, y) - | n + 1, acc => do - match ← option? stop with - | some y => return (acc, y) - | none => rest n <| acc.push (← p) + rest (s₀ : σ) (acc : Array α) : ParserT ε σ τ m (Array α × β) := do + match ← option? stop with + | some y => return (acc, y) + | none => + let x ← p + let s₁ ← getStream + if _h : Stream.remaining s₁ < Stream.remaining s₀ then + rest s₁ (acc.push x) + else + return (acc.push x, ← stop) + termination_by Stream.remaining s₀ /-! ### `drop` family -/ @@ -269,15 +273,19 @@ and no input is consumed. -/ def dropUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m β := withBacktracking do - let fuel := Stream.remaining (← getStream) - loop fuel + loop (← getStream) where - loop : Nat → ParserT ε σ τ m β - | 0 => stop - | n + 1 => do - match ← option? stop with - | some s => return s - | none => p *> loop n + loop (s₀ : σ) : ParserT ε σ τ m β := do + match ← option? stop with + | some y => return y + | none => + let _ ← p + let s₁ ← getStream + if _h : Stream.remaining s₁ < Stream.remaining s₀ then + loop s₁ + else + stop + termination_by Stream.remaining s₀ /-! `count` family -/ @@ -312,17 +320,19 @@ error from `p` is reported and no input is consumed. def countUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Nat × β) := withBacktracking do - let fuel := Stream.remaining (← getStream) - loop fuel 0 + loop (← getStream) 0 where - loop : Nat → Nat → ParserT ε σ τ m (Nat × β) - | 0, ct => do - let s ← stop - return (ct, s) - | n + 1, ct => do - match ← option? stop with - | some s => return (ct, s) - | none => p *> loop n (ct+1) + loop (s₀ : σ) (ct : Nat) : ParserT ε σ τ m (Nat × β) := do + match ← option? stop with + | some y => return (ct, y) + | none => + let _ ← p + let s₁ ← getStream + if h : Stream.remaining s₁ < Stream.remaining s₀ then + loop s₁ (ct + 1) + else + return (ct + 1, ← stop) + termination_by Stream.remaining s₀ /-! ### `endBy` family -/ diff --git a/Parser/Iterators.lean b/Parser/Iterators.lean index 46a0b576..a75a4d06 100644 --- a/Parser/Iterators.lean +++ b/Parser/Iterators.lean @@ -17,22 +17,22 @@ Each `Parser.Stream σ τ` provides: - A `Std.Stream σ τ` with `next? : σ → Option (τ × σ)` for consuming tokens - `remaining : σ → Nat`, an upper bound that strictly decreases on each `next?` yielding `some` -We define a wrapper `StreamIterator σ τ` that carries a `Parser.Stream` state and provides: -- `Iterator (StreamIterator σ τ) Id τ` — steps via `next?`, never skips -- `Finite (StreamIterator σ τ) Id` — well-founded via `remaining` - -This enables using `Std.Data.Iterators` consumers (e.g., `fold`, `toList`, `toArray`) on -parser streams, and provides the foundation for future migration of lean4-parser's fold -combinators to use `Std.Iterators` infrastructure directly. +We define `StreamIterator σ τ` wrapper that provides: +- `Iterator (StreamIterator σ τ) Id τ` — steps via `next?`, never skips (for all `Parser.Stream`) +- `Finite (StreamIterator σ τ) Id` — well-founded via `remaining` (requires `LawfulParserStream`) +- `IteratorLoop` — enables `for` loops over stream tokens (requires `LawfulParserStream`) ## Usage ```lean import Parser.Iterators --- Given a Parser.Stream instance for String.Slice: -def example (s : String.Slice) : List Char := - (StreamIterator.mk s).iter.toList +-- Given a LawfulParserStream instance (e.g., Subarray, OfList): +def collectTokens [Parser.Stream σ τ] [LawfulParserStream σ τ] (s : σ) : Array τ := Id.run do + let mut acc := #[] + for tok in (StreamIterator.mk s).iter do + acc := acc.push tok + return acc ``` -/ @@ -67,10 +67,10 @@ def StreamIterator.iter (s : StreamIterator σ τ) : Iter (α := StreamIterator s.iterM.toIter /-- -Helper predicate for the `Iterator` instance. Defined as a standalone function so that +Predicate for the `Iterator` instance. Defined as a standalone function so that `simp` and `unfold` can reduce it when the `IterStep` constructor is known. -/ -private def isPlausibleStreamStep +def isPlausibleStreamStep (it : IterM (α := StreamIterator σ τ) Id τ) (step : IterStep (IterM (α := StreamIterator σ τ) Id τ) τ) : Prop := match step with @@ -105,19 +105,18 @@ instance instIterator : Iterator (StreamIterator σ τ) Id τ where exact h⟩ /-- -`Finite` instance for `StreamIterator`, proven via `Parser.Stream.remaining`. +`Finite` instance for `StreamIterator`, proven via `LawfulParserStream.remaining_decreases`. The `remaining` field of `Parser.Stream` provides an upper bound on tokens that strictly decreases when `next?` returns `some`. We use `remaining ∘ StreamIterator.stream` as the well-founded measure via a `FinitenessRelation`. -**Note**: This proof currently uses `sorry` for the key lemma that `remaining` strictly -decreases on each `next?` step. A complete proof requires adding a contract/axiom to -`Parser.Stream` stating: - `∀ s tok s', next? s = some (tok, s') → remaining s' < remaining s` -This is semantically required by all `Parser.Stream` instances but not yet enforced by the type. +Requires `LawfulParserStream σ τ` to provide the proof that `remaining` strictly decreases. +Types without a `LawfulParserStream` instance (e.g., `mkDefault`) still get the `Iterator` +instance above, but not `Finite` — they cannot prove termination. -/ -private def streamFinitenessRelation : FinitenessRelation (StreamIterator σ τ) Id where +def streamFinitenessRelation [LawfulParserStream σ τ] : + FinitenessRelation (StreamIterator σ τ) Id where Rel := InvImage WellFoundedRelation.rel (Parser.Stream.remaining ∘ StreamIterator.stream ∘ IterM.internalState) wf := InvImage.wf _ WellFoundedRelation.wf @@ -128,28 +127,24 @@ private def streamFinitenessRelation : FinitenessRelation (StreamIterator σ τ) simp [IterStep.successor] at hsucc subst hsucc simp only [IterM.IsPlausibleStep, Iterator.IsPlausibleStep, isPlausibleStreamStep] at hplaus - -- hplaus : Stream.next? it.internalState.stream = some (out, it'.internalState.stream) - -- Need: remaining it'.internalState.stream < remaining it.internalState.stream - -- This follows from the Parser.Stream contract that remaining strictly decreases - -- when next? returns some, but that contract is not yet formalized as a typeclass law. - sorry + exact LawfulParserStream.remaining_decreases _ _ _ hplaus | skip it'' => simp [IterStep.successor] at hsucc subst hsucc - -- IsPlausibleStep for skip is False simp only [IterM.IsPlausibleStep, Iterator.IsPlausibleStep, isPlausibleStreamStep] at hplaus | done => simp [IterStep.successor] at hsucc -instance : Iterators.Finite (StreamIterator σ τ) Id := +instance [LawfulParserStream σ τ] : Iterators.Finite (StreamIterator σ τ) Id := Iterators.Finite.of_finitenessRelation streamFinitenessRelation /-- `IteratorLoop` instance enabling `for` loops and standard consumers (`fold`, `toList`, etc.) -over `StreamIterator`. +over `StreamIterator`. Requires `LawfulParserStream` for the `Finite` proof. -/ @[always_inline, inline] -instance {n : Type → Type} [Monad n] : IteratorLoop (StreamIterator σ τ) Id n := +instance [LawfulParserStream σ τ] {n : Type → Type} [Monad n] : + IteratorLoop (StreamIterator σ τ) Id n := .defaultImplementation end Parser.Stream diff --git a/Parser/Parser.lean b/Parser/Parser.lean index 04752826..49d66c0f 100644 --- a/Parser/Parser.lean +++ b/Parser/Parser.lean @@ -186,28 +186,30 @@ def withErrorMessage (msg : String) (p : ParserT ε σ τ m α) : ParserT ε σ /-! ### `foldl` family -/ /-- -Core loop for `efoldlP`. Terminates structurally on `fuel` (initially `Stream.remaining s`). +Core loop for `efoldlP`. Terminates via well-founded recursion on `Stream.remaining s`. -When `p` succeeds without consuming input (i.e., `Stream.remaining` does not decrease), the loop -stops and returns as if `p` had failed. This prevents infinite loops from non-consuming parsers -while preserving the original semantics for well-behaved (consuming) parsers. +After each successful `p >>= f` step, we check whether `Stream.remaining` decreased. If not +(the parser succeeded without consuming input), the loop stops to prevent infinite iteration. +This matches the original fuel-based semantics but uses `remaining` directly as the termination +measure rather than a separate fuel parameter. -/ @[specialize] private def efoldlPAux [Inhabited ε] [Inhabited σ] [Inhabited β] - (f : β → α → ParserT ε σ τ m β) (p : ParserT ε σ τ m α) (y : β) (s : σ) - (fuel : Nat := Stream.remaining s) : - m (Parser.Result ε σ (β × ε × Bool)) := - match fuel with - | 0 => - -- Out of fuel: treat as if p failed (no input consumed). - return .ok s (y, Error.unexpected (Stream.getPosition s) none, false) - | fuel' + 1 => - let savePos := Stream.getPosition s - p s >>= fun - | .ok s x => f y x s >>= fun - | .ok s y => efoldlPAux f p y s (min fuel' (Stream.remaining s)) - | .error s e => return .ok (Stream.setPosition s savePos) (y, e, true) - | .error s e => return .ok (Stream.setPosition s savePos) (y, e, false) + (f : β → α → ParserT ε σ τ m β) (p : ParserT ε σ τ m α) (y : β) (s : σ) : + m (Parser.Result ε σ (β × ε × Bool)) := do + let savePos := Stream.getPosition s + match ← p s with + | .ok s' x => + match ← f y x s' with + | .ok s'' y' => + if _h : Stream.remaining s'' < Stream.remaining s then + efoldlPAux f p y' s'' + else + -- Parser didn't consume; stop to avoid infinite loop. + return .ok s'' (y', Error.unexpected (Stream.getPosition s'') none, false) + | .error s'' e => return .ok (Stream.setPosition s'' savePos) (y, e, true) + | .error s' e => return .ok (Stream.setPosition s' savePos) (y, e, false) +termination_by Stream.remaining s /-- `foldlP f init p` folds the parser function `f` from left to right using `init` as an intitial diff --git a/Parser/Stream.lean b/Parser/Stream.lean index 75d2c0ae..fbc601f1 100644 --- a/Parser/Stream.lean +++ b/Parser/Stream.lean @@ -46,6 +46,20 @@ protected class Parser.Stream (σ : Type _) (τ : outParam (Type _)) extends Std attribute [reducible, inherit_doc Parser.Stream] Parser.Stream.Position attribute [inherit_doc Parser.Stream] Parser.Stream.getPosition Parser.Stream.setPosition +/-- Lawful parser stream: `remaining` strictly decreases when `next?` consumes a token. + +This law formalizes the contract that `Parser.Stream.remaining` provides a well-founded termination +measure. Instances of `LawfulParserStream` are required by the sorry-free `Finite` instance for +`StreamIterator`, enabling provably terminating iteration via `Std.Data.Iterators`. + +The `mkDefault` fallback does not satisfy this law (its `remaining` uses a `partial def`), +so there is intentionally no `LawfulParserStream` instance for `mkDefault`. +-/ +class LawfulParserStream (σ : Type _) (τ : outParam (Type _)) [Parser.Stream σ τ] : Prop where + /-- `remaining` strictly decreases when `next?` returns `some`. -/ + remaining_decreases : ∀ (s : σ) (tok : τ) (s' : σ), + Stream.next? s = some (tok, s') → Parser.Stream.remaining s' < Parser.Stream.remaining s + namespace Parser.Stream /-- Stream segment type. -/ @@ -86,6 +100,13 @@ instance : Parser.Stream String.Slice Char where setPosition _ s := s remaining s := s.endExclusive.offset.byteIdx - s.startInclusive.offset.byteIdx +/-- TODO: prove via `Slice.Pos.lt_next` and `Slice.sliceFrom` lemmas once the +right `simp` set for String.Slice byte-index arithmetic is identified. -/ +instance : LawfulParserStream String.Slice Char where + remaining_decreases _ _ _ _ := by + simp only [Parser.Stream.remaining] + sorry + @[reducible] instance : Parser.Stream Substring.Raw Char where Position := String.Pos.Raw @@ -97,6 +118,13 @@ instance : Parser.Stream Substring.Raw Char where { s with startPos := s.stopPos } remaining s := s.stopPos.byteIdx - s.startPos.byteIdx +/-- TODO: prove via `String.Pos.Raw.lt_next` once the right `simp` set for +Substring.Raw byte-index arithmetic is identified. -/ +instance : LawfulParserStream Substring.Raw Char where + remaining_decreases _ _ _ _ := by + simp only [Parser.Stream.remaining] + sorry + @[reducible] instance (τ) : Parser.Stream (Subarray τ) τ where Position := Nat @@ -108,6 +136,19 @@ instance (τ) : Parser.Stream (Subarray τ) τ where ⟨{ s.internalRepresentation with start := s.stop, start_le_stop := Nat.le_refl s.stop }⟩ remaining s := s.stop - s.start +instance (τ) : LawfulParserStream (Subarray τ) τ where + remaining_decreases s tok s' h := by + simp only [Parser.Stream.remaining] + dsimp [Stream.next?, Std.Stream.next?] at h + split at h + · next hlt => + obtain ⟨_, rfl⟩ := Option.some.inj h + simp only [Subarray.start, Subarray.stop] + have h1 : s.start = s.internalRepresentation.start := rfl + have h2 : s.stop = s.internalRepresentation.stop := rfl + omega + · nomatch h + @[reducible] instance : Parser.Stream ByteSlice UInt8 where Position := Nat @@ -115,6 +156,13 @@ instance : Parser.Stream ByteSlice UInt8 where setPosition s p := s.slice p remaining s := s.stop - s.start +instance : LawfulParserStream ByteSlice UInt8 where + remaining_decreases s tok s' h := by + simp only [Parser.Stream.remaining] + -- Stream.next? for ByteSlice: s[0]? >>= (·, s.popFront) + -- popFront advances start by 1, so s'.stop - s'.start < s.stop - s.start + sorry + /-- `OfList` is a view of a list stream that keeps track of consumed tokens. -/ structure OfList (τ : Type _) where /-- Remaining tokens. -/ @@ -153,4 +201,16 @@ instance (τ) : Parser.Stream (OfList τ) τ where | ⟨x :: rest, past⟩ => some (x, ⟨rest, x :: past⟩) | _ => none +instance (τ) : LawfulParserStream (OfList τ) τ where + remaining_decreases s tok s' h := by + simp only [Parser.Stream.remaining] + match s, h with + | ⟨_ :: _, _⟩, h => + dsimp [Stream.next?, Std.Stream.next?] at h + obtain ⟨_, rfl⟩ := Option.some.inj h + simp [List.length_cons] + | ⟨[], _⟩, h => + dsimp [Stream.next?, Std.Stream.next?] at h + nomatch h + end Parser.Stream From 8cfc6ac93d436f145f767a58b8e22cf540f94795 Mon Sep 17 00:00:00 2001 From: Nicolas Rouquette Date: Wed, 25 Feb 2026 21:34:08 -0800 Subject: [PATCH 3/3] =?UTF-8?q?feat:=20add=20fold-combinator=20specificati?= =?UTF-8?q?on=20lemmas=20for=20m=3DId\n\nAdd=20upstream=20lemmas=20in=20le?= =?UTF-8?q?an4-parser=20that=20make=20the=20fold-based\nparser=20combinato?= =?UTF-8?q?rs=20deductively=20transparent=20for=20proofs:\n\n-=20efoldlPAu?= =?UTF-8?q?x=5Feq:=20one-step=20unfolding=20of=20the=20private=20WF-recurs?= =?UTF-8?q?ive=20loop\n-=20efoldlPAux=5Finhabited=5Firrel:=20Inhabited=20i?= =?UTF-8?q?nstances=20don't=20affect=20computation\n-=20foldl=5Feq:=20one-?= =?UTF-8?q?step=20recursive=20equation=20for=20foldl=20(m=3DId)\n-=20dropM?= =?UTF-8?q?any=5Feq:=20one-step=20recursive=20equation=20for=20dropMany\n-?= =?UTF-8?q?=20count=5Feq:=20one-step=20equation=20for=20count\n\nThese=20l?= =?UTF-8?q?emmas=20close=20the=20fold-combinator=20gap=20(=C2=A74b)=20that?= =?UTF-8?q?=20blocked\nproofs=20of=20literalScalar/foldedScalar=20YAML=20s?= =?UTF-8?q?pec=20theorems."?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Parser/Basic.lean | 40 +++++++++++++++ Parser/Parser.lean | 119 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) diff --git a/Parser/Basic.lean b/Parser/Basic.lean index 030e82d9..ecf969d0 100644 --- a/Parser/Basic.lean +++ b/Parser/Basic.lean @@ -251,6 +251,26 @@ all outputs. def dropMany (p : ParserT ε σ τ m α) : ParserT ε σ τ m PUnit := foldl (Function.const α) .unit p +/-- +One-step unfolding of `dropMany` for `m = Id`. + +Applies `foldl_eq` with `f = Function.const α` and `init = PUnit.unit`. +Since `Function.const α PUnit.unit x = PUnit.unit` for all `x`, the +recursive call is simply `dropMany p s'`. +-/ +theorem dropMany_eq (p : Parser ε σ τ α) (s : σ) : + (dropMany p : Parser ε σ τ _) s = + match p s with + | .ok s' _ => + if _h : Stream.remaining s' < Stream.remaining s then + (dropMany p : Parser ε σ τ _) s' + else .ok s' PUnit.unit + | .error s' _ => + .ok (Stream.setPosition s' (Stream.getPosition s)) PUnit.unit := by + show (foldl (Function.const α) PUnit.unit p) s = _ + rw [foldl_eq]; simp only [Function.const] + cases hp : p s <;> rfl + /-- `dropMany1 p` parses one or more occurrences of `p` (with backtracking) until it fails, ignoring all outputs. @@ -297,6 +317,26 @@ successes. def count (p : ParserT ε σ τ m α) : ParserT ε σ τ m Nat := foldl (fun n _ => n+1) 0 p +/-- +One-step unfolding of `count` for `m = Id`. + +Applies `foldl_eq` with `f = fun n _ => n+1` and `init = 0`. +After one successful parse, the accumulator is 1, so the recursive call +uses `foldl (fun n _ => n+1) 1 p`. +-/ +theorem count_eq (p : Parser ε σ τ α) (s : σ) : + (count p : Parser ε σ τ _) s = + match p s with + | .ok s' _ => + if _h : Stream.remaining s' < Stream.remaining s then + (foldl (fun n (_ : α) => n+1) 1 p : Parser ε σ τ _) s' + else .ok s' 1 + | .error s' _ => + .ok (Stream.setPosition s' (Stream.getPosition s)) 0 := by + show (foldl (fun n (_ : α) => n + 1) 0 p) s = _ + rw [foldl_eq]; simp only [Nat.zero_add] + cases hp : p s <;> rfl + /-- `countUpTo n p` parses up to `n` occurrences of `p` until it fails, and returns the count of successes. This parser never fails. diff --git a/Parser/Parser.lean b/Parser/Parser.lean index 49d66c0f..cd5efbc6 100644 --- a/Parser/Parser.lean +++ b/Parser/Parser.lean @@ -211,6 +211,48 @@ private def efoldlPAux [Inhabited ε] [Inhabited σ] [Inhabited β] | .error s' e => return .ok (Stream.setPosition s' savePos) (y, e, false) termination_by Stream.remaining s +/-! ### `efoldlPAux` one-step equation (m = Id) + +For the identity monad, `efoldlPAux` reduces to a direct case split on +`p s` and `f y x s'`. This one-step equation is the foundation for +downstream `@[simp]` lemmas for `foldl`, `dropMany`, and `count`. +-/ + +/-- +One-step unfolding of `efoldlPAux` for `m = Id`. + +This expresses the recursive function as a case split: +- If `p s` fails → return accumulator, backtrack position, error from `p`. +- If `p s` succeeds with `(s', x)` and `f y x s'` fails → return accumulator, + backtrack position, error from `f`. +- If both succeed with stream `s''` and `remaining s'' < remaining s` → recurse. +- If both succeed but stream didn't advance → stop, return accumulator. +-/ +private theorem efoldlPAux_eq [Inhabited ε] [Inhabited σ] [Inhabited β] + (f : β → α → Parser ε σ τ β) (p : Parser ε σ τ α) (y : β) (s : σ) : + efoldlPAux (m := Id) f p y s = + match p s with + | .ok s' x => + match f y x s' with + | .ok s'' y' => + if _h : Stream.remaining s'' < Stream.remaining s then + efoldlPAux (m := Id) f p y' s'' + else + .ok s'' (y', Error.unexpected (Stream.getPosition s'') none, false) + | .error s'' e => + .ok (Stream.setPosition s'' (Stream.getPosition s)) (y, e, true) + | .error s' e => + .ok (Stream.setPosition s' (Stream.getPosition s)) (y, e, false) := by + rw [efoldlPAux] + simp only [bind, Bind.bind, pure, Pure.pure] + cases hp : p s with + | ok s' x => + simp only [] + cases hf : f y x s' with + | ok s'' y' => simp only [] + | error s'' e => rfl + | error s' e => rfl + /-- `foldlP f init p` folds the parser function `f` from left to right using `init` as an intitial value and the parser `p` to generate inputs of type `α`. The folding ends as soon as the update @@ -275,6 +317,83 @@ This parser never fails. def foldl (f : β → α → β) (init : β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m β := Prod.fst <$> efoldl f init p +/-! ### Fold-combinator specification lemmas (m = Id) + +One-step unfolding lemmas that make the fold combinators +deductively transparent for proofs. All are specialized to +`m = Id` (the `Parser` abbreviation), since the monadic plumbing +reduces to identity / function application for `Id`. +-/ + +/-- +`efoldlPAux` is independent of the `Inhabited` instances. + +The `[Inhabited ε] [Inhabited σ] [Inhabited β]` arguments are required +by well-founded recursion infrastructure but do not appear in the +function body. This lemma allows changing instances freely. +-/ +private theorem efoldlPAux_inhabited_irrel + (iε : Inhabited ε) (iσ : Inhabited σ) (iβ : Inhabited β) + (jε : Inhabited ε) (jσ : Inhabited σ) (jβ : Inhabited β) + (f : β → α → Parser ε σ τ β) (p : Parser ε σ τ α) (y : β) (s : σ) : + @efoldlPAux _ Id _ _ _ _ _ _ _ iε iσ iβ f p y s = + @efoldlPAux _ Id _ _ _ _ _ _ _ jε jσ jβ f p y s := by + rw [@efoldlPAux_eq _ _ _ _ _ _ _ iε iσ iβ f p y s, + @efoldlPAux_eq _ _ _ _ _ _ _ jε jσ jβ f p y s] + split -- match p s + · split -- match f y _ _ + · split -- if remaining < remaining + · exact efoldlPAux_inhabited_irrel iε iσ iβ jε jσ jβ f p _ _ + · rfl + · rfl + · rfl +termination_by Stream.remaining s + +/-- +One-step unfolding of `foldl` for `m = Id`. + +`foldl f init p` applied to stream `s` performs one step: +- If `p s` succeeds with `(s', x)` and `remaining s' < remaining s`, + it recurses with `foldl f (f init x) p s'`. +- If `p s` succeeds but stream didn't advance, returns `f init x`. +- If `p s` fails, returns `init` with position restored. +-/ +theorem foldl_eq (f : β → α → β) (init : β) (p : Parser ε σ τ α) (s : σ) : + (foldl f init p : Parser ε σ τ _) s = + match p s with + | .ok s' x => + if _h : Stream.remaining s' < Stream.remaining s then + (foldl f (f init x) p : Parser ε σ τ _) s' + else .ok s' (f init x) + | .error s' _ => + .ok (Stream.setPosition s' (Stream.getPosition s)) init := by + -- Unfold the full foldl chain to efoldlPAux on both sides + simp only [foldl, efoldl, efoldlM, efoldlP, + Functor.map, bind, Bind.bind, pure, Pure.pure, + monadLift, MonadLift.monadLift] + -- Unfold efoldlPAux one step with explicit Inhabited instances + rw [@efoldlPAux_eq _ _ _ _ _ _ _ + ⟨Error.unexpected (Stream.getPosition s) none⟩ ⟨s⟩ ⟨init⟩] + -- Case-split on p s + cases hp : p s with + | ok s' x => + -- Reduce inner matches: F init x s' = .ok s' (f init x), etc. + simp only [] + -- Split on the consuming condition + by_cases h : Stream.remaining s' < Stream.remaining s + · -- Consuming: reduce dite on both sides + simp only [dif_pos h] + -- Both sides: match chain wrapping efoldlPAux with different Inhabited + congr 1; congr 1 + exact efoldlPAux_inhabited_irrel + ⟨Error.unexpected (Stream.getPosition s) none⟩ ⟨s⟩ ⟨init⟩ + ⟨Error.unexpected (Stream.getPosition s') none⟩ ⟨s'⟩ ⟨f init x⟩ + (fun y x s => .ok s (f y x)) p (f init x) s' + · -- Not consuming + simp only [dif_neg h] + | error s' e => + simp only [] + /-! ### `option` family -/ /--