Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Parser.lean
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Released under Apache 2.0 license as described in the file LICENSE.
import Parser.Basic
import Parser.Char
import Parser.Error
import Parser.Iterators
import Parser.Parser
import Parser.Prelude
import Parser.RegEx
Expand Down
122 changes: 96 additions & 26 deletions Parser/Basic.lean
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,23 @@ def test (p : ParserT ε σ τ m α) : ParserT ε σ τ m Bool :=

/-! ### `foldr` -/

/-- `foldr f p q` -/
/-- `foldr f p q` folds `f` from right to left, parsing `p` repeatedly until it fails, then
finishing with `q`. Terminates via well-founded recursion on `Stream.remaining`. -/
@[inline]
partial def foldr (f : α → β → β) (p : ParserT ε σ τ m α) (q : ParserT ε σ τ m β) :
ParserT ε σ τ m β :=
try
let x ← withBacktracking p
let y ← foldr f p q
return f x y
catch _ => q
def foldr (f : α → β → β) (p : ParserT ε σ τ m α) (q : ParserT ε σ τ m β) :
ParserT ε σ τ m β := do
go (← getStream)
where
go (s₀ : σ) : ParserT ε σ τ m β := do
try
let x ← withBacktracking p
let s₁ ← getStream
if _h : Stream.remaining s₁ < Stream.remaining s₀ then
return f x (← go s₁)
else
return f x (← q)
catch _ => q
termination_by Stream.remaining s₀

/-! ### `take` family -/

Expand Down Expand Up @@ -194,15 +202,22 @@ def takeManyN (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array
array of returned values of `p` and the output of `stop`. If `p` fails before `stop` is encountered,
the error from `p` is reported and no input is consumed.
-/
partial def takeUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) :
def takeUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) :
ParserT ε σ τ m (Array α × β) :=
have := Inhabited.mk do return ((#[] : Array α), (← stop))
withBacktracking do rest #[]
withBacktracking do
rest (← getStream) #[]
where
rest [Inhabited (ParserT ε σ τ m (Array α × β))] (acc : Array α) := do
rest (s₀ : σ) (acc : Array α) : ParserT ε σ τ m (Array α × β) := do
match ← option? stop with
| some y => return (acc, y)
| none => rest <| acc.push (← p)
| none =>
let x ← p
let s₁ ← getStream
if _h : Stream.remaining s₁ < Stream.remaining s₀ then
rest s₁ (acc.push x)
else
return (acc.push x, ← stop)
termination_by Stream.remaining s₀

/-! ### `drop` family -/

Expand Down Expand Up @@ -236,6 +251,26 @@ all outputs.
def dropMany (p : ParserT ε σ τ m α) : ParserT ε σ τ m PUnit :=
foldl (Function.const α) .unit p

/--
One-step unfolding of `dropMany` for `m = Id`.

Applies `foldl_eq` with `f = Function.const α` and `init = PUnit.unit`.
Since `Function.const α PUnit.unit x = PUnit.unit` for all `x`, the
recursive call is simply `dropMany p s'`.
-/
theorem dropMany_eq (p : Parser ε σ τ α) (s : σ) :
(dropMany p : Parser ε σ τ _) s =
match p s with
| .ok s' _ =>
if _h : Stream.remaining s' < Stream.remaining s then
(dropMany p : Parser ε σ τ _) s'
else .ok s' PUnit.unit
| .error s' _ =>
.ok (Stream.setPosition s' (Stream.getPosition s)) PUnit.unit := by
show (foldl (Function.const α) PUnit.unit p) s = _
rw [foldl_eq]; simp only [Function.const]
cases hp : p s <;> rfl

/--
`dropMany1 p` parses one or more occurrences of `p` (with backtracking) until it fails, ignoring
all outputs.
Expand All @@ -256,13 +291,21 @@ def dropManyN (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m PUnit :
outputs from `p`. If `p` fails before encountering `stop` then the error from `p` is reported
and no input is consumed.
-/
partial def dropUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m β :=
withBacktracking loop
def dropUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m β :=
withBacktracking do
loop (← getStream)
where
loop := do
loop (s₀ : σ) : ParserT ε σ τ m β := do
match ← option? stop with
| some s => return s
| none => p *> loop
| some y => return y
| none =>
let _ ← p
let s₁ ← getStream
if _h : Stream.remaining s₁ < Stream.remaining s₀ then
loop s₁
else
stop
termination_by Stream.remaining s₀

/-! `count` family -/

Expand All @@ -271,9 +314,29 @@ where
successes.
-/
@[inline]
partial def count (p : ParserT ε σ τ m α) : ParserT ε σ τ m Nat :=
def count (p : ParserT ε σ τ m α) : ParserT ε σ τ m Nat :=
foldl (fun n _ => n+1) 0 p

/--
One-step unfolding of `count` for `m = Id`.

Applies `foldl_eq` with `f = fun n _ => n+1` and `init = 0`.
After one successful parse, the accumulator is 1, so the recursive call
uses `foldl (fun n _ => n+1) 1 p`.
-/
theorem count_eq (p : Parser ε σ τ α) (s : σ) :
(count p : Parser ε σ τ _) s =
match p s with
| .ok s' _ =>
if _h : Stream.remaining s' < Stream.remaining s then
(foldl (fun n (_ : α) => n+1) 1 p : Parser ε σ τ _) s'
else .ok s' 1
| .error s' _ =>
.ok (Stream.setPosition s' (Stream.getPosition s)) 0 := by
show (foldl (fun n (_ : α) => n + 1) 0 p) s = _
rw [foldl_eq]; simp only [Nat.zero_add]
cases hp : p s <;> rfl

/--
`countUpTo n p` parses up to `n` occurrences of `p` until it fails, and returns the count of
successes. This parser never fails.
Expand All @@ -294,15 +357,22 @@ where
the count of successes and the output of `stop`. If `p` fails before encountering `stop` then the
error from `p` is reported and no input is consumed.
-/
partial def countUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) :
ParserT ε σ τ m (Nat × β) := do
let _ := Inhabited.mk do return (0, ← stop)
withBacktracking do loop 0
def countUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) :
ParserT ε σ τ m (Nat × β) :=
withBacktracking do
loop (← getStream) 0
where
loop [Inhabited (ParserT ε σ τ m (Nat × β))] (ct : Nat) := do
loop (s₀ : σ) (ct : Nat) : ParserT ε σ τ m (Nat × β) := do
match ← option? stop with
| some s => return (ct, s)
| none => p *> loop (ct+1)
| some y => return (ct, y)
| none =>
let _ ← p
let s₁ ← getStream
if h : Stream.remaining s₁ < Stream.remaining s₀ then
loop s₁ (ct + 1)
else
return (ct + 1, ← stop)
termination_by Stream.remaining s₀

/-! ### `endBy` family -/

Expand Down
150 changes: 150 additions & 0 deletions Parser/Iterators.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/-
Copyright © 2026 Nicolas Rouquette. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
-/

import Parser.Stream
import Std.Data.Iterators

/-! # Std.Iterators bridge for Parser.Stream

This module provides `Iterator` and `Finite` instances for `Parser.Stream` types, bridging the
lean4-parser stream abstraction to the `Std.Data.Iterators` framework.

## Design

Each `Parser.Stream σ τ` provides:
- A `Std.Stream σ τ` with `next? : σ → Option (τ × σ)` for consuming tokens
- `remaining : σ → Nat`, an upper bound that strictly decreases on each `next?` yielding `some`

We define `StreamIterator σ τ` wrapper that provides:
- `Iterator (StreamIterator σ τ) Id τ` — steps via `next?`, never skips (for all `Parser.Stream`)
- `Finite (StreamIterator σ τ) Id` — well-founded via `remaining` (requires `LawfulParserStream`)
- `IteratorLoop` — enables `for` loops over stream tokens (requires `LawfulParserStream`)

## Usage

```lean
import Parser.Iterators

-- Given a LawfulParserStream instance (e.g., Subarray, OfList):
def collectTokens [Parser.Stream σ τ] [LawfulParserStream σ τ] (s : σ) : Array τ := Id.run do
let mut acc := #[]
for tok in (StreamIterator.mk s).iter do
acc := acc.push tok
return acc
```
-/

open Std Std.Iterators

namespace Parser.Stream

/--
Wrapper that presents a `Parser.Stream` state as a `Std.Iterators` iterator state.

The iterator yields tokens of type `τ` by calling `next?` on the underlying stream.
It terminates when `next?` returns `none`.
-/
structure StreamIterator (σ : Type) (τ : Type) [Parser.Stream σ τ] where
/-- The underlying parser stream state. -/
stream : σ

variable {σ τ : Type} [Parser.Stream σ τ]

/-- Create a `StreamIterator` from a parser stream state. -/
@[inline]
def StreamIterator.mk' (s : σ) : StreamIterator σ τ := ⟨s⟩

/-- Create a monadic iterator (`IterM Id τ`) from a `StreamIterator`. -/
@[inline]
def StreamIterator.iterM (s : StreamIterator σ τ) : IterM (α := StreamIterator σ τ) Id τ :=
IterM.mk s Id τ

/-- Create a pure iterator (`Iter τ`) from a `StreamIterator`. -/
@[inline]
def StreamIterator.iter (s : StreamIterator σ τ) : Iter (α := StreamIterator σ τ) τ :=
s.iterM.toIter

/--
Predicate for the `Iterator` instance. Defined as a standalone function so that
`simp` and `unfold` can reduce it when the `IterStep` constructor is known.
-/
def isPlausibleStreamStep
(it : IterM (α := StreamIterator σ τ) Id τ)
(step : IterStep (IterM (α := StreamIterator σ τ) Id τ) τ) : Prop :=
match step with
| .yield it' out =>
Stream.next? it.internalState.stream = some (out, it'.internalState.stream)
| .skip _ => False
| .done => Stream.next? it.internalState.stream = none

/--
`Iterator` instance for `StreamIterator`. Each step calls `next?` on the underlying stream:
- If `next?` returns `some (tok, s')`, yields `tok` and advances to `s'`.
- If `next?` returns `none`, the iterator is done.

The iterator never produces `skip` steps.

The `IsPlausibleStep` predicate ties each step to the actual `next?` result, ensuring that
the plausible successor relation mirrors the stream's token consumption — which is the basis
for the `Finite` proof.
-/
instance instIterator : Iterator (StreamIterator σ τ) Id τ where
IsPlausibleStep := isPlausibleStreamStep
step it := pure <|
match h : Stream.next? it.internalState.stream with
| some (tok, s') =>
.deflate ⟨.yield (IterM.mk (α := StreamIterator σ τ) ⟨s'⟩ Id τ) tok, by
unfold isPlausibleStreamStep
simp
exact h⟩
| none =>
.deflate ⟨.done, by
unfold isPlausibleStreamStep
exact h⟩

/--
`Finite` instance for `StreamIterator`, proven via `LawfulParserStream.remaining_decreases`.

The `remaining` field of `Parser.Stream` provides an upper bound on tokens that strictly
decreases when `next?` returns `some`. We use `remaining ∘ StreamIterator.stream` as the
well-founded measure via a `FinitenessRelation`.

Requires `LawfulParserStream σ τ` to provide the proof that `remaining` strictly decreases.
Types without a `LawfulParserStream` instance (e.g., `mkDefault`) still get the `Iterator`
instance above, but not `Finite` — they cannot prove termination.
-/
def streamFinitenessRelation [LawfulParserStream σ τ] :
FinitenessRelation (StreamIterator σ τ) Id where
Rel := InvImage WellFoundedRelation.rel
(Parser.Stream.remaining ∘ StreamIterator.stream ∘ IterM.internalState)
wf := InvImage.wf _ WellFoundedRelation.wf
subrelation {it it'} h := by
obtain ⟨step, hsucc, hplaus⟩ := h
cases step with
| yield it'' out =>
simp [IterStep.successor] at hsucc
subst hsucc
simp only [IterM.IsPlausibleStep, Iterator.IsPlausibleStep, isPlausibleStreamStep] at hplaus
exact LawfulParserStream.remaining_decreases _ _ _ hplaus
| skip it'' =>
simp [IterStep.successor] at hsucc
subst hsucc
simp only [IterM.IsPlausibleStep, Iterator.IsPlausibleStep, isPlausibleStreamStep] at hplaus
| done =>
simp [IterStep.successor] at hsucc

instance [LawfulParserStream σ τ] : Iterators.Finite (StreamIterator σ τ) Id :=
Iterators.Finite.of_finitenessRelation streamFinitenessRelation

/--
`IteratorLoop` instance enabling `for` loops and standard consumers (`fold`, `toList`, etc.)
over `StreamIterator`. Requires `LawfulParserStream` for the `Finite` proof.
-/
@[always_inline, inline]
instance [LawfulParserStream σ τ] {n : Type → Type} [Monad n] :
IteratorLoop (StreamIterator σ τ) Id n :=
.defaultImplementation

end Parser.Stream
Loading