From 05b8063fb99a9a249783adb8cb1a349c5ddac4d2 Mon Sep 17 00:00:00 2001 From: Nicolas Rouquette Date: Thu, 26 Feb 2026 20:17:01 -0800 Subject: [PATCH 1/2] Add WellFoundedStreams module from batteries PR#1331 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements François Dorais's suggestions from lean4-parser PR#97: - Add WellFoundedStreams as a self-contained component based on batteries PR#1331 (Stream.WellFounded / Stream.Finite) - Architecture: iterators before streams (reverse of std-iterators approach) WellFoundedStreams/Basic.lean: - Stream basics: drop, take with tail-recursive variant and csimp proof - StreamIterator bridge: productive pure iterator for any Std.Stream - Correctness proofs for take/drop operations WellFoundedStreams/Finite.lean: - Stream.WithNextRelation: class defining next relation for a stream type - Stream.WellFounded: well-founded stream type class - Stream.Finite: per-instance finiteness proof - WellFounded instance for List - Total fold combinators: foldlM, foldrM, foldl, foldr - Collection operations: length, toListRev, toList, toArray - Correctness theorems for all operations Uses Std.Stream (non-deprecated) throughout. Iterator bridge section (connecting Stream.WellFounded to Std.Iterators.Finite) deferred pending adaptation to v4.28.0 iterator APIs. --- WellFoundedStreams.lean | 11 ++ WellFoundedStreams/Basic.lean | 187 +++++++++++++++++++ WellFoundedStreams/Finite.lean | 322 +++++++++++++++++++++++++++++++++ lakefile.toml | 3 + 4 files changed, 523 insertions(+) create mode 100644 WellFoundedStreams.lean create mode 100644 WellFoundedStreams/Basic.lean create mode 100644 WellFoundedStreams/Finite.lean diff --git a/WellFoundedStreams.lean b/WellFoundedStreams.lean new file mode 100644 index 00000000..c02f4525 --- /dev/null +++ b/WellFoundedStreams.lean @@ -0,0 +1,11 @@ +/- +Copyright (c) 2025 François G. Dorais. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: François G. Dorais + +Adapted from batteries PR#1331 (https://github.com/leanprover-community/batteries/pull/1331) +for inclusion in lean4-parser as a self-contained component. +-/ + +import WellFoundedStreams.Basic +import WellFoundedStreams.Finite diff --git a/WellFoundedStreams/Basic.lean b/WellFoundedStreams/Basic.lean new file mode 100644 index 00000000..0d92f32c --- /dev/null +++ b/WellFoundedStreams/Basic.lean @@ -0,0 +1,187 @@ +/- +Copyright (c) 2024-2025 François G. Dorais. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: François G. Dorais + +Adapted from batteries PR#1331 (https://github.com/leanprover-community/batteries/pull/1331) +for inclusion in lean4-parser as a self-contained component. +-/ + +import Std.Data.Iterators + +/-! # Stream Basics + +Basic operations on streams: `drop`, `take`, and `StreamIterator` bridging +streams to `Std.Iterators`. +-/ + +namespace Stream + +/-- Drop up to `n` values from the stream `s`. -/ +def drop [Std.Stream σ α] (s : σ) : Nat → σ + | 0 => s + | n+1 => + match next? s with + | none => s + | some (_, s) => drop s n + +/-- Read up to `n` values from the stream `s` as a list from first to last. -/ +def take [Std.Stream σ α] (s : σ) : Nat → List α × σ + | 0 => ([], s) + | n+1 => + match next? s with + | none => ([], s) + | some (a, s) => + match take s n with + | (as, s) => (a :: as, s) + +@[simp] theorem fst_take_zero [Std.Stream σ α] (s : σ) : + (take s 0).fst = [] := rfl + +theorem fst_take_succ [Std.Stream σ α] (s : σ) : + (take s (n+1)).fst = + match next? s with + | none => [] + | some (a, s) => a :: (take s n).fst := by + simp only [take]; split <;> rfl + +@[simp] theorem snd_take_eq_drop [Std.Stream σ α] (s : σ) (n : Nat) : + (take s n).snd = drop s n := by + induction n generalizing s with + | zero => rfl + | succ n ih => + simp only [take, drop] + split <;> simp [ih] + +/-- Tail recursive version of `Stream.take`. -/ +private def takeTR [Std.Stream σ α] (s : σ) (n : Nat) : List α × σ := + loop s [] n +where + /-- Inner loop for `Stream.takeTR`. -/ + loop (s : σ) (acc : List α) + | 0 => (acc.reverse, s) + | n+1 => match next? s with + | none => (acc.reverse, s) + | some (a, s) => loop s (a :: acc) n + +private theorem fst_takeTR_loop [Std.Stream σ α] (s : σ) (acc : List α) (n : Nat) : + (takeTR.loop s acc n).fst = acc.reverseAux (take s n).fst := by + induction n generalizing acc s with + | zero => rfl + | succ n ih => simp only [take, takeTR.loop]; split; rfl; simp [ih] + +private theorem fst_takeTR [Std.Stream σ α] (s : σ) (n : Nat) : (takeTR s n).fst = (take s n).fst := + fst_takeTR_loop .. + +private theorem snd_takeTR_loop [Std.Stream σ α] (s : σ) (acc : List α) (n : Nat) : + (takeTR.loop s acc n).snd = drop s n := by + induction n generalizing acc s with + | zero => rfl + | succ n ih => simp only [takeTR.loop, drop]; split; rfl; simp [ih] + +private theorem snd_takeTR [Std.Stream σ α] (s : σ) (n : Nat) : + (takeTR s n).snd = drop s n := snd_takeTR_loop .. + +@[csimp] private theorem take_eq_takeTR : @take = @takeTR := by + funext; ext : 1; rw [fst_takeTR]; rw [snd_takeTR, snd_take_eq_drop] + +end Stream + +@[simp] +theorem List.stream_drop_eq_drop (l : List α) : Stream.drop l n = l.drop n := by + induction n generalizing l with + | zero => rfl + | succ n ih => + match l with + | [] => rfl + | _::_ => simp [Stream.drop, List.drop_succ_cons, ih] + +@[simp] theorem List.stream_take_eq_take_drop (l : List α) : + Stream.take l n = (l.take n, l.drop n) := by + induction n generalizing l with + | zero => rfl + | succ n ih => + match l with + | [] => rfl + | _::_ => simp [Stream.take, ih] + +/-! ## Stream Iterators + +The standard library provides iterators that behave much like streams but include many additional +features to support monadic effects, among other things. This added functionality makes the user +interface more complicated. By comparison, the stream interface is quite simple and easy to use. + +The standard library provides a stream interface for productive pure iterators. + +The following provide the reverse translation. The function `Stream.iter` converts a `Stream σ α` +into a productive pure iterator of type `StreamIterator σ α`. Finiteness properties for streams are +provided in `WellFoundedStreams.Finite`. + +In addition to providing a back and forth translation between stream types and productive pure +iterators, this functionality is also useful for existing stream-based libraries to incrementally +convert to iterators. +-/ + +/-- The underlying type of a stream iterator. -/ +structure StreamIterator (σ α) [Std.Stream σ α] where + /-- Underlying stream of a stream iterator. -/ + stream : σ + +section StreamIteratorDefs +open Std.Iterators + +/-- +Returns a productive pure iterator for the given stream. + +**Termination properties:** + +* `Finite` instance: maybe available +* `Productive` instance: always +-/ +@[always_inline, inline] +def Stream.iter [Std.Stream σ α] (stream : σ) : Std.Iter (α := StreamIterator σ α) α := + Std.IterM.mk { stream } Id α |>.toIter + +/-- +Predicate for the `Iterator` instance. Defined as a standalone function so that +`simp` and `unfold` can reduce it when the `IterStep` constructor is known. +-/ +def isPlausibleStreamStep [Std.Stream σ α] + (it : Std.IterM (α := StreamIterator σ α) Id α) + (step : Std.IterStep (Std.IterM (α := StreamIterator σ α) Id α) α) : Prop := + match step with + | .yield it' out => + Std.Stream.next? it.internalState.stream = some (out, it'.internalState.stream) + | .skip _ => False + | .done => Std.Stream.next? it.internalState.stream = none + +@[always_inline, inline] +instance (σ α) [Std.Stream σ α] : Std.Iterator (StreamIterator σ α) Id α where + IsPlausibleStep := isPlausibleStreamStep + step it := pure <| + match h : Std.Stream.next? it.internalState.stream with + | some (out, stream) => + .deflate ⟨.yield (Std.IterM.mk (α := StreamIterator σ α) ⟨stream⟩ Id α) out, by + unfold isPlausibleStreamStep + simp + exact h⟩ + | none => + .deflate ⟨.done, by + unfold isPlausibleStreamStep + exact h⟩ + +private def StreamIterator.instProductivenessRelation [Std.Stream σ α] : + ProductivenessRelation (StreamIterator σ α) Id where + Rel := emptyWf.rel + wf := emptyWf.wf + subrelation h := by cases h + +instance StreamIterator.instProductive [Std.Stream σ α] : + Productive (StreamIterator σ α) Id := + Productive.of_productivenessRelation + StreamIterator.instProductivenessRelation + +instance StreamIterator.instIteratorLoop [Std.Stream σ α] [Monad n] : + Std.IteratorLoop (StreamIterator σ α) Id n := .defaultImplementation + +end StreamIteratorDefs diff --git a/WellFoundedStreams/Finite.lean b/WellFoundedStreams/Finite.lean new file mode 100644 index 00000000..b1c6d697 --- /dev/null +++ b/WellFoundedStreams/Finite.lean @@ -0,0 +1,322 @@ +/- +Copyright (c) 2025 François G. Dorais. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: François G. Dorais + +Adapted from batteries PR#1331 (https://github.com/leanprover-community/batteries/pull/1331) +for inclusion in lean4-parser as a self-contained component. +-/ + +import WellFoundedStreams.Basic + +/-! # Finite and Well-Founded Streams + +This module defines two main classes: + +- `Stream.Finite` provides a proof that a specific stream is finite. +- `Stream.WellFounded` extends the `Std.Stream` class and provides an instance of `Stream.Finite` + for every stream of the given type. + +The importance of these classes is that iterating along a finite stream is guaranteed to terminate. +Therefore, it is possible to prove facts about such iterations. + +The `Stream.WellFounded` class is easiest to use: match on `Std.Stream.next?` and provide a +termination hint. The general pattern is this: +``` +def foo [Stream.WellFounded σ α] (s : σ) : β := + match _hint : Std.Stream.next? s with + | none => _ + | some (x, t) => _ +termination_by s +``` + +The `Stream.Finite` class is more general but is slightly trickier to use. +``` +def foo [Std.Stream σ α] (s : σ) [Stream.Finite s] : β := + match hint : Std.Stream.next? s with + | none => _ + | some (x, t) => + have : Stream.Finite t := .ofSome hint + _ +termination_by Stream.Finite.wrap s +``` + +## Connection with `Std.Iterators` + +While iterators from the standard library also provide support for finite iterators, the iterator +interface is much more complex than the stream interface. This is necessary in order to support +monadic effects and other specialized features. A stream interface for finiteness is useful for +simple applications. The standard library already defines `Std.Stream` instances for productive pure +iterators. This module adds `Stream.WellFounded` instances for finite pure iterators. It also adds +instances the other way so that `StreamIterators` from well-founded streams translate to finite pure +iterators. + +This is also useful for existing stream-based libraries that want to gradually transition to +iterators. +-/ + +namespace Stream + +/-- Class to define the next relation of a stream type. -/ +class WithNextRelation (σ : Type _) (α : outParam <| Type _) extends Std.Stream σ α where + /-- Next relation for a stream type `σ`. -/ + rel : σ → σ → Prop -- this is named `rel` to match `WellFoundedRelation` + /-- Next relation extends the default. -/ + rel_of_next?_eq_some : Std.Stream.next? s = some (x, t) → rel t s + +/-- Default next relation for a stream type. -/ +instance (priority := low) defaultNextRelation (σ) [Std.Stream σ α] : WithNextRelation σ α where + rel t s := ∃ x, Std.Stream.next? s = some (x, t) + rel_of_next?_eq_some h := ⟨_, h⟩ + +/-- Next relation for a stream. -/ +abbrev Next [WithNextRelation σ α] : σ → σ → Prop := WithNextRelation.rel + +/-- Next relation, restricted to a subset of streams. -/ +abbrev RestrictedNext [WithNextRelation σ α] (p : σ → Prop) (s t : σ) : Prop := + p t ∧ Next s t + +theorem next_of_next?_eq_some [WithNextRelation σ α] {s t : σ} : + Std.Stream.next? s = some (x, t) → Next t s := WithNextRelation.rel_of_next?_eq_some + +/-- Class for well-founded stream type. -/ +protected class WellFounded.{u,v} (σ : Type u) (α : outParam <| Type v) extends + WithNextRelation σ α, WellFoundedRelation σ + +/-- Define a well-founded stream type instance from a well-founded relation. -/ +def WellFounded.ofRelation.{u,v} [Std.Stream.{u,v} σ α] [inst : WellFoundedRelation σ] + (h : {s t : σ} → {x : α} → Std.Stream.next? s = (x, t) → WellFoundedRelation.rel t s) : + Stream.WellFounded σ α := { inst with rel_of_next?_eq_some := h } + +/-- Define a well-founded stream type instance from a measure. -/ +def WellFounded.ofMeasure.{u,v} [Std.Stream.{u,v} σ α] (f : σ → Nat) + (h : {s t : σ} → {x : α} → Std.Stream.next? s = (x, t) → f t < f s) : + Stream.WellFounded σ α := { measure f with rel_of_next?_eq_some := h } + +macro_rules +| `(tactic| decreasing_trivial) => + `(tactic| apply Stream.next_of_next?_eq_some; assumption) + +instance (α) : Stream.WellFounded (List α) α := + .ofMeasure List.length <| by + intro s t x h + match s, h with + | _ :: _, h => + simp [Std.Stream.next?] at h + obtain ⟨rfl, rfl⟩ := h + simp + +/-- Class for a finite stream of type `σ`. -/ +protected class Finite [WithNextRelation σ α] (s : σ) : Prop where + /-- A finite stream is accessible with respect to the `Next` relation. -/ + acc : Acc Next s + +instance [Stream.WellFounded σ α] (s : σ) : Stream.Finite s where + acc := match WellFounded.wf (σ := σ) with | ⟨h⟩ => h s + +namespace Finite + +/-- Predecessor of a finite stream is finite. -/ +theorem ofNext [WithNextRelation σ α] {s t : σ} [Stream.Finite s] (h : Next t s) : + Stream.Finite t where + acc := match acc (s := s) with | ⟨_, a⟩ => a _ h + +/-- Predecessor of a finite stream is finite. -/ +theorem ofSome [WithNextRelation σ α] {s t : σ} [Stream.Finite s] + (h : Std.Stream.next? s = some (x, t)) : Stream.Finite t := .ofNext <| next_of_next?_eq_some h + +/-- Wrap a finite stream into a well-founded type for use in termination proofs. -/ +def wrap [WithNextRelation σ α] (s : σ) [Stream.Finite s] : { s : σ // Acc Next s } := + ⟨s, Finite.acc⟩ + +end Finite + +/-- Folds a monadic function over a finite stream from left to right. -/ +@[inline, specialize] +def foldlM [Monad m] [WithNextRelation σ α] (s : σ) [Stream.Finite s] (f : β → α → m β) + (init : β) : m β := + match h : Std.Stream.next? s with + | none => pure init + | some (x, t) => + have : Stream.Finite t := .ofSome h + f init x >>= foldlM t f +termination_by Finite.wrap s + +theorem foldlM_none [Monad m] [WithNextRelation σ α] {s : σ} [Stream.Finite s] + {f : β → α → m β} (h : Std.Stream.next? s = none) : foldlM s f init = pure init := by + simp only [foldlM] + split <;> simp_all + +theorem foldlM_some [Monad m] [WithNextRelation σ α] {s t : σ} [Stream.Finite s] + [Stream.Finite t] + {f : β → α → m β} (h : Std.Stream.next? s = some (x, t)) : + foldlM s f init = f init x >>= foldlM t f := by + simp only [foldlM] + split + · simp_all + · next heq => + simp only [h, Option.some.injEq, Prod.mk.injEq] at heq + cases heq.1; cases heq.2; rfl + +/-- Folds a monadic function over a finite stream from right to left. -/ +@[specialize] +def foldrM [Monad m] [WithNextRelation σ α] (s : σ) [Stream.Finite s] (f : α → β → m β) + (init : β) : m β := + match h : Std.Stream.next? s with + | none => pure init + | some (x, t) => + have : Stream.Finite t := .ofSome h + foldrM t f init >>= f x +termination_by Finite.wrap s + +theorem foldrM_none [Monad m] [WithNextRelation σ α] {s : σ} [Stream.Finite s] + {f : α → β → m β} (h : Std.Stream.next? s = none) : foldrM s f init = pure init := by + rw [foldrM]; split <;> simp_all + +theorem foldrM_some [Monad m] [WithNextRelation σ α] {s t : σ} [Stream.Finite s] + [Stream.Finite t] + {f : α → β → m β} (h : Std.Stream.next? s = some (x, t)) : + foldrM s f init = foldrM t f init >>= f x := by + rw [foldrM]; split <;> simp_all + +/-- Folds a function over a finite stream from left to right. -/ +@[specialize] +def foldl [WithNextRelation σ α] (s : σ) [Stream.Finite s] (f : β → α → β) (init : β) : β := + match h : Std.Stream.next? s with + | none => init + | some (x, t) => + have : Stream.Finite t := .ofSome h + foldl t f (f init x) +termination_by Finite.wrap s + +theorem foldl_none [WithNextRelation σ α] {s : σ} [Stream.Finite s] {f : β → α → β} + (h : Std.Stream.next? s = none) : foldl s f init = init := by + rw [foldl]; split <;> simp_all + +theorem foldl_some [WithNextRelation σ α] {s t : σ} [Stream.Finite s] [Stream.Finite t] + {f : β → α → β} (h : Std.Stream.next? s = some (x, t)) : + foldl s f init = foldl t f (f init x) := by + rw [foldl]; split <;> simp_all + +/-- Folds a function over a finite stream from right to left. -/ +@[specialize] +def foldr [WithNextRelation σ α] (s : σ) [Stream.Finite s] (f : α → β → β) (init : β) : β := + match h : Std.Stream.next? s with + | none => init + | some (x, t) => + have : Stream.Finite t := .ofSome h + f x <| foldr t f init +termination_by Finite.wrap s + +theorem foldr_none [WithNextRelation σ α] {s : σ} [Stream.Finite s] + {f : α → β → β} (h : Std.Stream.next? s = none) : foldr s f init = init := by + rw [foldr] + split <;> simp_all + +theorem foldr_some [WithNextRelation σ α] {s t : σ} [Stream.Finite s] [Stream.Finite t] + {f : α → β → β} (h : Std.Stream.next? s = some (x, t)) : + foldr s f init = f x (foldr t f init) := by + rw [foldr] + split <;> simp_all + +/-- Extract the length of a finite stream. -/ +@[inline] +def length [WithNextRelation σ α] (s : σ) [Stream.Finite s] : Nat := + foldl s (fun l _ => l + 1) 0 + +theorem length_none [WithNextRelation σ α] {s : σ} [Stream.Finite s] + (h : Std.Stream.next? s = none) : length s = 0 := foldl_none h + +private theorem length_aux [WithNextRelation σ α] {s : σ} [Stream.Finite s] : + foldl s (fun l _ => l + 1) n = foldl s (fun l _ => l + 1) 0 + n := by + match h : Std.Stream.next? s with + | none => simp [foldl_none h] + | some (x, t) => + have : Stream.Finite t := .ofSome h + conv => lhs; rw [foldl_some h, length_aux] + conv => rhs; rw [foldl_some h, length_aux] + simp +arith +termination_by Finite.wrap s + +theorem length_some [WithNextRelation σ α] {s t : σ} [Stream.Finite s] [Stream.Finite t] + (h : Std.Stream.next? s = some (x, t)) : length s = length t + 1 := by + simp [length, foldl_some h, length_aux (n := 1)] + +/-- Extract the sequence of values of a finite stream as a `List` in reverse order. -/ +@[inline] +def toListRev [WithNextRelation σ α] (s : σ) [Stream.Finite s] : List α := + foldl s (fun r x => x :: r) [] + +theorem toListRev_none [WithNextRelation σ α] {s : σ} [Stream.Finite s] + (h : Std.Stream.next? s = none) : toListRev s = [] := by + simp [toListRev, foldl_none h] + +private theorem toListRev_aux [WithNextRelation σ α] {s : σ} [Stream.Finite s] : + foldl s (fun r x => x :: r) l = foldl s (fun r x => x :: r) [] ++ l := by + match h : Std.Stream.next? s with + | none => simp [foldl_none h] + | some (x, t) => + have : Stream.Finite t := .ofSome h + conv => lhs; rw [foldl_some h, toListRev_aux] + conv => rhs; rw [foldl_some h, toListRev_aux] + simp +termination_by Finite.wrap s + +theorem toListRev_some [WithNextRelation σ α] {s t : σ} [Stream.Finite s] [Stream.Finite t] + (h : Std.Stream.next? s = some (x, t)) : toListRev s = toListRev t ++ [x] := by + simp [toListRev, foldl_some h, toListRev_aux (l := [x])] + +/-- Extract the sequence of values of a finite stream as a `List`. -/ +@[inline] +def toList [WithNextRelation σ α] (s : σ) [Stream.Finite s] : List α := + toListRev s |>.reverse + +theorem toList_none [WithNextRelation σ α] {s : σ} [Stream.Finite s] + (h : Std.Stream.next? s = none) : toList s = [] := by + simp [toList, toListRev_none h] + +theorem toList_some [WithNextRelation σ α] {s t : σ} [Stream.Finite s] [Stream.Finite t] + (h : Std.Stream.next? s = some (x, t)) : toList s = x :: toList t := by + simp [toList, toListRev_some h] + +/-- Extract the sequence of values of a finite stream as an `Array`. -/ +@[inline] +def toArray [WithNextRelation σ α] (s : σ) [Stream.Finite s] : Array α := + foldl s Array.push #[] + +theorem toArray_none [WithNextRelation σ α] {s : σ} [Stream.Finite s] + (h : Std.Stream.next? s = none) : toArray s = #[] := by + simp [toArray, foldl_none h] + +private theorem toArray_aux [WithNextRelation σ α] {s : σ} [Stream.Finite s] : + foldl s Array.push l = l ++ foldl s Array.push #[] := by + match h : Std.Stream.next? s with + | none => simp [foldl_none h] + | some (x, t) => + have : Stream.Finite t := .ofSome h + conv => lhs; rw [foldl_some h, toArray_aux] + conv => rhs; rw [foldl_some h, toArray_aux] + simp +termination_by Finite.wrap s + +theorem toArray_some [WithNextRelation σ α] {s t : σ} [Stream.Finite s] [Stream.Finite t] + (h : Std.Stream.next? s = some (x, t)) : toArray s = #[x] ++ toArray t := by + simp [toArray, foldl_some h, toArray_aux (l := #[x])] + +theorem toArray_toList_eq_toArray [WithNextRelation σ α] {s : σ} [Stream.Finite s] : + (toList s).toArray = toArray s := by + match h : Std.Stream.next? s with + | none => simp [toList_none h, toArray_none h] + | some (x, t) => + have : Stream.Finite t := .ofSome h + simp only [toList_some h, toArray_some h] + rw [List.toArray_cons, toArray_toList_eq_toArray] +termination_by Finite.wrap s + +@[simp] +theorem toList_eq_self (l : List α) : toList l = l := by + induction l with + | nil => rw [toList_none rfl] + | cons x l ih => rw [toList_some rfl, ih] + +end Stream diff --git a/lakefile.toml b/lakefile.toml index 462fae72..dc7525ca 100644 --- a/lakefile.toml +++ b/lakefile.toml @@ -11,6 +11,9 @@ name = "UnicodeBasic" git = "https://github.com/fgdorais/lean4-unicode-basic" rev = "main" +[[lean_lib]] +name = "WellFoundedStreams" + [[lean_lib]] name = "Parser" From deb6e2e0e73164e93fcbaad506d380988c41e68a Mon Sep 17 00:00:00 2001 From: Nicolas Rouquette Date: Thu, 26 Feb 2026 21:26:50 -0800 Subject: [PATCH 2/2] feat: make fold combinators total via remaining-based termination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 'remaining : σ → Nat' field to Parser.Stream class, providing a termination measure for fold combinators. All six partial defs in Parser/Parser.lean and Parser/Basic.lean are now total: - efoldlPAux (core left-fold loop): uses termination_by remaining s - foldr (right fold): rewritten with explicit state-passing where-helper - takeUntil, dropUntil, countUntil: rewritten with explicit loops - count: now total transitively (calls foldl which calls efoldlPAux) Each fold iteration checks at runtime that 'remaining' decreases after both the element parser and fold function have run. If no progress is made, the fold terminates (returning accumulated value or an error). This prevents non-termination even with parsers that succeed without consuming input, while preserving identical behavior for well-behaved parsers that always consume on success. Stream instances provide remaining as: - String.Slice: utf8ByteSize - Substring.Raw: bsize (stopPos - startPos in bytes) - Subarray: stop - start - ByteSlice: size - OfList: next.length - mkDefault: 0 (fold combinators will not function) The six RegEx partial defs (RegEx.foldr, RegEx.match, re0-re3) are left as-is; they are a separate concern. Build: 208 jobs pass. --- Parser/Basic.lean | 89 ++++++++++++++++++++++++++++++++-------------- Parser/Parser.lean | 15 +++++--- Parser/Stream.lean | 15 ++++++++ 3 files changed, 88 insertions(+), 31 deletions(-) diff --git a/Parser/Basic.lean b/Parser/Basic.lean index 75d7d125..b525f29f 100644 --- a/Parser/Basic.lean +++ b/Parser/Basic.lean @@ -129,13 +129,21 @@ def test (p : ParserT ε σ τ m α) : ParserT ε σ τ m Bool := /-- `foldr f p q` -/ @[inline] -partial def foldr (f : α → β → β) (p : ParserT ε σ τ m α) (q : ParserT ε σ τ m β) : - ParserT ε σ τ m β := - try - let x ← withBacktracking p - let y ← foldr f p q - return f x y - catch _ => q +def foldr (f : α → β → β) (p : ParserT ε σ τ m α) (q : ParserT ε σ τ m β) : + ParserT ε σ τ m β := fun s => foldrAux s +where + foldrAux (s : σ) : m (Parser.Result ε σ β) := + let savePos := Stream.getPosition s + p s >>= fun + | .ok s' x => + if _h : Stream.remaining s' < Stream.remaining s then + foldrAux s' >>= fun + | .ok s'' y => return .ok s'' (f x y) + | .error s'' e => return .error s'' e + else + q (Stream.setPosition s' savePos) + | .error s' _ => q (Stream.setPosition s' savePos) + termination_by Stream.remaining s /-! ### `take` family -/ @@ -194,15 +202,25 @@ def takeManyN (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array array of returned values of `p` and the output of `stop`. If `p` fails before `stop` is encountered, the error from `p` is reported and no input is consumed. -/ -partial def takeUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : +def takeUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array α × β) := have := Inhabited.mk do return ((#[] : Array α), (← stop)) - withBacktracking do rest #[] + withBacktracking fun s => rest #[] s where - rest [Inhabited (ParserT ε σ τ m (Array α × β))] (acc : Array α) := do - match ← option? stop with - | some y => return (acc, y) - | none => rest <| acc.push (← p) + rest [Inhabited (ParserT ε σ τ m (Array α × β))] (acc : Array α) (s : σ) : + m (Parser.Result ε σ (Array α × β)) := + (option? stop) s >>= fun + | .ok s' (some y) => return .ok s' (acc, y) + | .ok s' none => + p s' >>= fun + | .ok s'' x => + if _h : Stream.remaining s'' < Stream.remaining s then + rest (acc.push x) s'' + else + return .error s'' (Error.unexpected (Stream.getPosition s'') none) + | .error s'' e => return .error s'' e + | .error s' e => return .error s' e + termination_by Stream.remaining s /-! ### `drop` family -/ @@ -256,13 +274,22 @@ def dropManyN (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m PUnit : outputs from `p`. If `p` fails before encountering `stop` then the error from `p` is reported and no input is consumed. -/ -partial def dropUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m β := - withBacktracking loop +def dropUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m β := + withBacktracking fun s => loop s where - loop := do - match ← option? stop with - | some s => return s - | none => p *> loop + loop (s : σ) : m (Parser.Result ε σ β) := + (option? stop) s >>= fun + | .ok s' (some v) => return .ok s' v + | .ok s' none => + p s' >>= fun + | .ok s'' _ => + if _h : Stream.remaining s'' < Stream.remaining s then + loop s'' + else + return .error s'' (Error.unexpected (Stream.getPosition s'') none) + | .error s'' e => return .error s'' e + | .error s' e => return .error s' e + termination_by Stream.remaining s /-! `count` family -/ @@ -271,7 +298,7 @@ where successes. -/ @[inline] -partial def count (p : ParserT ε σ τ m α) : ParserT ε σ τ m Nat := +def count (p : ParserT ε σ τ m α) : ParserT ε σ τ m Nat := foldl (fun n _ => n+1) 0 p /-- @@ -294,15 +321,25 @@ where the count of successes and the output of `stop`. If `p` fails before encountering `stop` then the error from `p` is reported and no input is consumed. -/ -partial def countUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : +def countUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Nat × β) := do let _ := Inhabited.mk do return (0, ← stop) - withBacktracking do loop 0 + withBacktracking fun s => loop 0 s where - loop [Inhabited (ParserT ε σ τ m (Nat × β))] (ct : Nat) := do - match ← option? stop with - | some s => return (ct, s) - | none => p *> loop (ct+1) + loop [Inhabited (ParserT ε σ τ m (Nat × β))] (ct : Nat) (s : σ) : + m (Parser.Result ε σ (Nat × β)) := + (option? stop) s >>= fun + | .ok s' (some v) => return .ok s' (ct, v) + | .ok s' none => + p s' >>= fun + | .ok s'' _ => + if _h : Stream.remaining s'' < Stream.remaining s then + loop (ct+1) s'' + else + return .error s'' (Error.unexpected (Stream.getPosition s'') none) + | .error s'' e => return .error s'' e + | .error s' e => return .error s' e + termination_by Stream.remaining s /-! ### `endBy` family -/ diff --git a/Parser/Parser.lean b/Parser/Parser.lean index 80b6284c..a2843ecf 100644 --- a/Parser/Parser.lean +++ b/Parser/Parser.lean @@ -186,15 +186,20 @@ def withErrorMessage (msg : String) (p : ParserT ε σ τ m α) : ParserT ε σ /-! ### `foldl` family -/ @[specialize] -private partial def efoldlPAux [Inhabited ε] [Inhabited σ] [Inhabited β] +private def efoldlPAux [Inhabited ε] [Inhabited σ] [Inhabited β] (f : β → α → ParserT ε σ τ m β) (p : ParserT ε σ τ m α) (y : β) (s : σ) : m (Parser.Result ε σ (β × ε × Bool)) := let savePos := Stream.getPosition s p s >>= fun - | .ok s x => f y x s >>= fun - | .ok s y => efoldlPAux f p y s - | .error s e => return .ok (Stream.setPosition s savePos) (y, e, true) - | .error s e => return .ok (Stream.setPosition s savePos) (y, e, false) + | .ok s' x => f y x s' >>= fun + | .ok s'' y => + if _h : Stream.remaining s'' < Stream.remaining s then + efoldlPAux f p y s'' + else + return .ok (Stream.setPosition s'' savePos) (y, default, true) + | .error s' e => return .ok (Stream.setPosition s' savePos) (y, e, true) + | .error s' e => return .ok (Stream.setPosition s' savePos) (y, e, false) +termination_by Stream.remaining s /-- `foldlP f init p` folds the parser function `f` from left to right using `init` as an intitial diff --git a/Parser/Stream.lean b/Parser/Stream.lean index 7e1e082b..5e8c1090 100644 --- a/Parser/Stream.lean +++ b/Parser/Stream.lean @@ -36,6 +36,15 @@ protected class Parser.Stream (σ : Type _) (τ : outParam (Type _)) extends Std Position : Type _ getPosition : σ → Position setPosition : σ → Position → σ + /-- Returns a natural number measure of remaining input. + + This is used by fold combinators as a termination measure to guarantee totality. + Each call to `next?` that returns `some` must strictly decrease `remaining`. + + Fold combinators check at runtime that `remaining` decreases after each parser + iteration, so even an incorrect implementation cannot cause non-termination. + However, an incorrect implementation may cause folds to terminate prematurely. -/ + remaining : σ → Nat attribute [reducible, inherit_doc Parser.Stream] Parser.Stream.Position attribute [inherit_doc Parser.Stream] Parser.Stream.getPosition Parser.Stream.setPosition @@ -64,12 +73,14 @@ instance (σ τ) [self : Std.Stream σ τ] : Parser.Stream (mkDefault σ τ) τ Position := σ getPosition s := s setPosition _ p := p + remaining _ := 0 -- fold combinators will not function with mkDefault @[reducible] instance : Parser.Stream String.Slice Char where Position := String.Slice getPosition s := s setPosition _ s := s + remaining s := s.utf8ByteSize @[reducible] instance : Parser.Stream Substring.Raw Char where @@ -80,6 +91,7 @@ instance : Parser.Stream Substring.Raw Char where { s with startPos := p } else { s with startPos := s.stopPos } + remaining s := s.bsize @[reducible] instance (τ) : Parser.Stream (Subarray τ) τ where @@ -90,12 +102,14 @@ instance (τ) : Parser.Stream (Subarray τ) τ where ⟨{ s.internalRepresentation with start := p, start_le_stop := h }⟩ else ⟨{ s.internalRepresentation with start := s.stop, start_le_stop := Nat.le_refl s.stop }⟩ + remaining s := s.stop - s.start @[reducible] instance : Parser.Stream ByteSlice UInt8 where Position := Nat getPosition s := s.start setPosition s p := s.slice p + remaining s := s.size /-- `OfList` is a view of a list stream that keeps track of consumed tokens. -/ structure OfList (τ : Type _) where @@ -133,5 +147,6 @@ instance (τ) : Parser.Stream (OfList τ) τ where match s with | ⟨x :: rest, past⟩ => some (x, ⟨rest, x :: past⟩) | _ => none + remaining s := s.next.length end Parser.Stream