diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e47fee5..d05fd1a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: uses: actions/checkout@v4 - name: Setup Haskell Stack - uses: haskell/actions/setup@v2 + uses: haskell-actions/setup@v2 with: enable-stack: true diff --git a/.gitignore b/.gitignore index ac547a9..972ef57 100644 --- a/.gitignore +++ b/.gitignore @@ -23,4 +23,4 @@ stack.yaml.lock tags .*.swp .qodo -SCRATCH +SCRATCH* diff --git a/CHANGELOG.md b/CHANGELOG.md index bca2c02..693daf6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,29 @@ # Changelog for `distributors` +## 0.3.0.0 - 2026-02-05 + +### New Modules + +- `Control.Lens.Grammar` - Grammar hierarchy based on Chomsky's formal grammar classification +- `Control.Lens.Grammar.BackusNaur` - Context-free grammar combinators (BNF) +- `Control.Lens.Grammar.Boole` - Boolean algebra for grammars +- `Control.Lens.Grammar.Kleene` - Regular expression combinators +- `Control.Lens.Grammar.Symbol` - Symbol-level grammar primitives +- `Control.Lens.Grammar.Token` - Token-level grammar primitives +- `Data.Profunctor.Filtrator` - Filterable profunctors +- `Data.Profunctor.Grammar` - Grammar profunctor abstraction +- `Data.Profunctor.Monadic` - Monadic profunctor combinators with QualifiedDo support +- `Data.Profunctor.Monoidal` - Monoidal profunctor combinators + +### Removed Modules + +- `Text.Grammar.Distributor` - Functionality split into the new modules above + +### Testing + +- Added `doctest` for documentation testing +- New test examples: Arithmetic, Json, Lambda, LenVec, RegString, SemVer, SExpr + ## 0.2.0.0 - 2025-07-08 Added some combinators for `RegEx`es. Updated documentation. diff --git a/LICENSE b/LICENSE index d43363b..c969d4b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright Eitan Chatav (c) 2024 +Copyright Eitan Chatav (c) 2026 All rights reserved. diff --git a/README.md b/README.md index 4a68cc1..f5992d9 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,8 @@ The term "distributor" is a synonym for "[profunctor](https://ncatlab.org/nlab/s Since "profunctor" became the standard nomenclature, we reappropriate "distributor" to describe a profunctor on a [distributive category](https://ncatlab.org/nlab/show/distributive+category). -This library provides a study of `Monoidal` profunctors, `Distributor`s, `Alternator`s and `Filtrator`s. These profunctor constraints are analogous to `Applicative`, `Alternative` and `Filterable` functors. Examples of `Distributor`s will include printers and parsers, and it is demonstrated how to write a single term for both. Profunctors naturally give rise to optics and this library also studies some previously discovered optics, `PartialIso`s, `Monocle`s, `Grate`s and `Wither`s and also defines new optics, `Diopter`s and `Bifocal`s. Finally, an application of distributors is demonstrated by unifying Backus-Naur form grammars with invertible parsers, giving users a powerful playground for language syntax design. +This library provides a study of distributor theory and optics, +and their application to invertible grammar theory. ## previous work @@ -28,6 +29,11 @@ While `Distributor`s in the library are lax distributive endoprofunctors, a math The idea for unifying Backus-Naur grammars with parsers comes from Joachim Breitner in a post [Showcasing Applicative](https://www.joachim-breitner.de/blog/710-Showcasing_Applicative). +The idea for using monadic profunctors for reversible context-free grammars +comes from Li-Yao Xia, in some posts and a paper but particularly +this post on [Monadic profunctors for bidirectional programming] +(https://blog.poisson.chat/posts/2017-01-01-monadic-profunctors.html). + The person deserving the most credit for bringing the power of optics to programming, with his [lens library](https://github.com/ekmett/lens/), is Ed Kmett, to whom I am very grateful for teaching me a lot. None of the ideas in this library are particularly original and a lot of related ideas have been explored, in Tom Ellis' [product-profunctors](https://github.com/tomjaguarpaw/product-profunctors) as well as Sjoerd Visscher's [one-liner](https://github.com/sjoerdvisscher/one-liner) and more. Such explorations are _not_ limited to Haskell. Brandon Williams and Stephen Celis' excellent [swift-parsing](https://github.com/pointfreeco/swift-parsing) was also influenced by invertible parser theory. diff --git a/blog.md b/blog.md index 30caca7..e4c6a40 100644 --- a/blog.md +++ b/blog.md @@ -368,12 +368,12 @@ Just as `Alternative` has 0-or-more `many` and 0-or-1 `optional` combinators, we ```Haskell optionalP :: Distributor p => p a b -> p (Maybe a) (Maybe b) -optionalP p = mapIso maybeEot (oneP >+< p) +optionalP p = mapIso eotMaybe (oneP >+< p) manyP :: p a b -> p [a] [b] manyP p = mapIso listEot (oneP >+< p >*< manyP p) -maybeEot :: Iso (Maybe a) (Maybe b) (Either () a) (Either () b) +eotMaybe :: Iso (Maybe a) (Maybe b) (Either () a) (Either () b) listEot :: (Cons s s a a, AsEmpty t, Cons t t b b) diff --git a/distributors.cabal b/distributors.cabal index b442c11..9a92b07 100644 --- a/distributors.cabal +++ b/distributors.cabal @@ -1,11 +1,11 @@ cabal-version: 2.2 --- This file has been generated from package.yaml by hpack version 0.37.0. +-- This file has been generated from package.yaml by hpack version 0.39.1. -- -- see: https://github.com/sol/hpack name: distributors -version: 0.2.0.1 +version: 0.3.0.0 synopsis: Unifying Parsers, Printers & Grammars description: Distributors provides mathematically inspired abstractions for coders to write parsers that can also be inverted to printers. category: Profunctors, Optics, Parsing @@ -13,7 +13,7 @@ homepage: https://github.com/morphismtech/distributors#readme bug-reports: https://github.com/morphismtech/distributors/issues author: Eitan Chatav maintainer: eitan.chatav@gmail.com -copyright: 2025 Eitan Chatav +copyright: 2026 Eitan Chatav license: BSD-3-Clause license-file: LICENSE build-type: Simple @@ -30,13 +30,22 @@ library exposed-modules: Control.Lens.Bifocal Control.Lens.Diopter + Control.Lens.Grammar + Control.Lens.Grammar.BackusNaur + Control.Lens.Grammar.Boole + Control.Lens.Grammar.Kleene + Control.Lens.Grammar.Symbol + Control.Lens.Grammar.Token Control.Lens.Grate Control.Lens.Internal.NestedPrismTH Control.Lens.Monocle Control.Lens.PartialIso Control.Lens.Wither Data.Profunctor.Distributor - Text.Grammar.Distributor + Data.Profunctor.Filtrator + Data.Profunctor.Grammar + Data.Profunctor.Monadic + Data.Profunctor.Monoidal other-modules: Paths_distributors autogen-modules: @@ -49,7 +58,9 @@ library ConstraintKinds DataKinds DefaultSignatures + DeriveFoldable DeriveFunctor + DeriveTraversable DeriveGeneric DerivingStrategies DerivingVia @@ -65,12 +76,13 @@ library LambdaCase MagicHash MonoLocalBinds - OverloadedStrings + QualifiedDo QuantifiedConstraints RankNTypes RecursiveDo ScopedTypeVariables StandaloneDeriving + StandaloneKindSignatures TemplateHaskell TupleSections TypeApplications @@ -80,28 +92,36 @@ library UndecidableSuperClasses ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints build-depends: - adjunctions >=4.4 && <5 - , base >=4.7 && <5 - , bifunctors >=5.6 && <6 + MemoTrie >=0.6 && <1 + , adjunctions >=4.4 && <5 + , base >=4.15 && <5 + , bifunctors >=5.5 && <6 , bytestring >=0.11 && <1 , containers >=0.6 && <1 , contravariant >=1.5 && <2 , distributive >=0.6 && <1 - , lens >=5.2 && <6 - , mtl >=2.3 && <3 + , lens >=5.0 && <6 + , mtl >=2.2 && <3 , profunctors >=5.6 && <6 , tagged >=0.8 && <1 - , template-haskell + , template-haskell >=2.17 && <3 , text ==2.* - , th-abstraction - , vector >=0.13 && <1 + , th-abstraction >=0.4 && <1 + , vector >=0.12 && <1 , witherable >=0.4 && <1 default-language: Haskell2010 -test-suite spec +test-suite test type: exitcode-stdio-1.0 - main-is: Spec.hs + main-is: Main.hs other-modules: + Examples.Arithmetic + Examples.Json + Examples.Lambda + Examples.LenVec + Examples.RegString + Examples.SemVer + Examples.SExpr Paths_distributors autogen-modules: Paths_distributors @@ -113,7 +133,9 @@ test-suite spec ConstraintKinds DataKinds DefaultSignatures + DeriveFoldable DeriveFunctor + DeriveTraversable DeriveGeneric DerivingStrategies DerivingVia @@ -129,12 +151,13 @@ test-suite spec LambdaCase MagicHash MonoLocalBinds - OverloadedStrings + QualifiedDo QuantifiedConstraints RankNTypes RecursiveDo ScopedTypeVariables StandaloneDeriving + StandaloneKindSignatures TemplateHaskell TupleSections TypeApplications @@ -144,22 +167,24 @@ test-suite spec UndecidableSuperClasses ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints build-depends: - adjunctions >=4.4 && <5 - , base >=4.7 && <5 - , bifunctors >=5.6 && <6 + MemoTrie >=0.6 && <1 + , adjunctions >=4.4 && <5 + , base >=4.15 && <5 + , bifunctors >=5.5 && <6 , bytestring >=0.11 && <1 , containers >=0.6 && <1 , contravariant >=1.5 && <2 , distributive >=0.6 && <1 , distributors - , hspec - , lens >=5.2 && <6 - , mtl >=2.3 && <3 + , doctest >=0.18 && <1 + , hspec >=2.7 && <3 + , lens >=5.0 && <6 + , mtl >=2.2 && <3 , profunctors >=5.6 && <6 , tagged >=0.8 && <1 - , template-haskell + , template-haskell >=2.17 && <3 , text ==2.* - , th-abstraction - , vector >=0.13 && <1 + , th-abstraction >=0.4 && <1 + , vector >=0.12 && <1 , witherable >=0.4 && <1 default-language: Haskell2010 diff --git a/package.yaml b/package.yaml index aee12c3..3f0209e 100644 --- a/package.yaml +++ b/package.yaml @@ -1,17 +1,16 @@ name: distributors -version: 0.2.0.1 +version: 0.3.0.0 github: "morphismtech/distributors" license: BSD-3-Clause author: "Eitan Chatav" maintainer: "eitan.chatav@gmail.com" -copyright: "2025 Eitan Chatav" +copyright: "2026 Eitan Chatav" extra-source-files: - README.md extra-doc-files: - CHANGELOG.md - category: Profunctors, Optics, Parsing synopsis: Unifying Parsers, Printers & Grammars description: @@ -19,21 +18,22 @@ description: for coders to write parsers that can also be inverted to printers. dependencies: -- base >= 4.7 && < 5 +- base >= 4.15 && < 5 - adjunctions >= 4.4 && < 5 -- bifunctors >= 5.6 && < 6 +- bifunctors >= 5.5 && < 6 - bytestring >= 0.11 && < 1 - containers >= 0.6 && < 1 - contravariant >= 1.5 && < 2 - distributive >= 0.6 && < 1 -- lens >= 5.2 && < 6 -- mtl >= 2.3 && < 3 +- lens >= 5.0 && < 6 +- MemoTrie >= 0.6 && < 1 +- mtl >= 2.2 && < 3 - profunctors >= 5.6 && < 6 - tagged >= 0.8 && < 1 -- template-haskell +- template-haskell >= 2.17 && < 3 - text >= 2 && < 3 -- th-abstraction -- vector >= 0.13 && < 1 +- th-abstraction >= 0.4 && < 1 +- vector >= 0.12 && < 1 - witherable >= 0.4 && < 1 ghc-options: @@ -56,7 +56,9 @@ default-extensions: - ConstraintKinds - DataKinds - DefaultSignatures +- DeriveFoldable - DeriveFunctor +- DeriveTraversable - DeriveGeneric - DerivingStrategies - DerivingVia @@ -72,12 +74,13 @@ default-extensions: - LambdaCase - MagicHash - MonoLocalBinds -- OverloadedStrings +- QualifiedDo - QuantifiedConstraints - RankNTypes - RecursiveDo - ScopedTypeVariables - StandaloneDeriving +- StandaloneKindSignatures - TemplateHaskell - TupleSections - TypeApplications @@ -87,9 +90,10 @@ default-extensions: - UndecidableSuperClasses tests: - spec: - main: Spec.hs + test: + main: Main.hs source-dirs: test dependencies: - distributors - - hspec + - doctest >= 0.18 && < 1 + - hspec >= 2.7 && < 3 diff --git a/src/Control/Lens/Bifocal.hs b/src/Control/Lens/Bifocal.hs index de62f16..6a09aaf 100644 --- a/src/Control/Lens/Bifocal.hs +++ b/src/Control/Lens/Bifocal.hs @@ -1,7 +1,7 @@ {- | Module : Control.Lens.Bifocal Description : bifocals -Copyright : (C) 2025 - Eitan Chatav +Copyright : (C) 2026 - Eitan Chatav License : BSD-style (see the file LICENSE) Maintainer : Eitan Chatav Stability : provisional @@ -17,10 +17,6 @@ module Control.Lens.Bifocal , mapBifocal , cloneBifocal , withBifocal - , chainedl1 - , chainedr1 - , chainedl - , chainedr -- * Binocular , Binocular (..), runBinocular -- * Prismoid @@ -28,6 +24,8 @@ module Control.Lens.Bifocal , somed , lefted , righted + , chained1 + , chained -- * Filtroid , Filtroid , unlefted @@ -40,6 +38,7 @@ import Control.Lens.Internal.Profunctor import Control.Lens.PartialIso import Data.Profunctor import Data.Profunctor.Distributor +import Data.Profunctor.Filtrator import Witherable {- | `Bifocal`s are bidirectional parser optics. @@ -114,37 +113,30 @@ unrighted :: Filtroid a b (Either c a) (Either d b) unrighted = unwrapPafb . snd . filtrate . WrapPafb {- | -Left associate a binary constructor pattern to sequence one or more times. +Associate a binary constructor pattern to sequence one or more times. -} -chainedl1 :: APartialIso a b (a,a) (b,b) -> Bifocal a b a b -chainedl1 pat = unwrapPafb . chainl1 pat noSep . WrapPafb - - -{- | -Right associate a binary constructor pattern to sequence one or more times. --} -chainedr1 :: APartialIso a b (a,a) (b,b) -> Bifocal a b a b -chainedr1 pat = unwrapPafb . chainr1 pat noSep . WrapPafb - -{- | -Left associate a binary constructor pattern to sequence one or more times, -or use a nilary constructor pattern to sequence zero times. --} -chainedl :: APartialIso a b (a,a) (b,b) -> APartialIso a b () () -> Bifocal a b a b -chainedl c2 c0 = unwrapPafb . chainl c2 c0 noSep . WrapPafb +chained1 + :: (forall x. x -> Either x x) + -> APartialIso a b (a,a) (b,b) + -> Prismoid a b a b +chained1 assoc binPat = unwrapPafb . chain1 assoc binPat noSep . WrapPafb {- | -Right associate a binary constructor pattern to sequence one or more times, +Associate a binary constructor pattern to sequence one or more times, or use a nilary constructor pattern to sequence zero times. -} -chainedr :: APartialIso a b (a,a) (b,b) -> APartialIso a b () () -> Bifocal a b a b -chainedr c2 c0 = unwrapPafb . chainr c2 c0 noSep . WrapPafb +chained + :: (forall x. x -> Either x x) + -> APartialIso a b (a,a) (b,b) + -> APrism a b () () + -> Prismoid a b a b +chained assoc binPat nilPat = unwrapPafb . chain assoc binPat nilPat noSep . WrapPafb {- | Run `ABifocal` over an `Alternative` & `Filterable`. -} withBifocal :: (Alternative f, Filterable f) => ABifocal s t a b -> ((s -> Maybe a) -> f b) -> f t -withBifocal bif = unBinocular (catMaybes (bif (Just <$> anyToken))) +withBifocal bif = unBinocular (catMaybes (bif (Just <$> Binocular ($ Just)))) {- | `Binocular` provides an efficient concrete representation of `Bifocal`s. -} @@ -153,8 +145,6 @@ newtype Binocular a b s t = Binocular :: forall f. (Alternative f, Filterable f) => ((s -> Maybe a) -> f b) -> f t } -instance Tokenized a b (Binocular a b) where - anyToken = Binocular ($ Just) instance Profunctor (Binocular a b) where dimap f g (Binocular k) = Binocular $ fmap g . k . (. (. f)) instance Functor (Binocular a b s) where fmap = rmap diff --git a/src/Control/Lens/Diopter.hs b/src/Control/Lens/Diopter.hs index 266e2b5..c78b06c 100644 --- a/src/Control/Lens/Diopter.hs +++ b/src/Control/Lens/Diopter.hs @@ -1,7 +1,7 @@ {- | Module : Control.Lens.Diopter Description : diopters -Copyright : (C) 2025 - Eitan Chatav +Copyright : (C) 2026 - Eitan Chatav License : BSD-style (see the file LICENSE) Maintainer : Eitan Chatav Stability : provisional @@ -55,7 +55,7 @@ withDiopter :: ADiopter s t a b -> (forall h. Homogeneous h => (s -> h a) -> (h b -> t) -> r) -> r -withDiopter dio k = case (runIdentity <$> dio (Identity <$> anyToken)) of +withDiopter dio k = case runIdentity <$> dio (Identity <$> Dioptrice Par1 unPar1) of Dioptrice f g -> k f g {- | Action of `ADiopter` on `Distributor`s. -} @@ -94,8 +94,6 @@ data Dioptrice a b s t where => (s -> h a) -> (h b -> t) -> Dioptrice a b s t -instance Tokenized a b (Dioptrice a b) where - anyToken = Dioptrice Par1 unPar1 instance Profunctor (Dioptrice a b) where dimap f g (Dioptrice sa bt) = Dioptrice (sa . f) (g . bt) instance Functor (Dioptrice a b s) where fmap = rmap diff --git a/src/Control/Lens/Grammar.hs b/src/Control/Lens/Grammar.hs new file mode 100644 index 0000000..27ce10b --- /dev/null +++ b/src/Control/Lens/Grammar.hs @@ -0,0 +1,806 @@ +{- | +Module : Control.Lens.Grammar +Description : grammar hierarchy +Copyright : (C) 2026 - Eitan Chatav +License : BSD-style (see the file LICENSE) +Maintainer : Eitan Chatav +Stability : provisional +Portability : non-portable + +See Chomsky, [On Certain Formal Properties of Grammars] +(https://somr.info/lib/Chomsky_1959.pdf) +-} + +module Control.Lens.Grammar + ( -- * Regular grammar + RegGrammar + , Lexical + , RegString (..) + , regstringG + , regexGrammar + -- * Context-free grammar + , Grammar + , RegBnf (..) + , regbnfG + , regbnfGrammar + -- * Context-sensitive grammar + , CtxGrammar + , printG + , parseG + , unparseG + -- * Utility + , putStringLn + ) where + +import Control.Applicative +import Control.Lens +import Control.Lens.PartialIso +import Control.Lens.Grammar.BackusNaur +import Control.Lens.Grammar.Boole +import Control.Lens.Grammar.Kleene +import Control.Lens.Grammar.Token +import Control.Lens.Grammar.Symbol +import Data.Maybe hiding (mapMaybe) +import Data.Monoid +import Data.Profunctor.Distributor +import Data.Profunctor.Filtrator +import Data.Profunctor.Monadic +import Data.Profunctor.Monoidal +import Data.Profunctor.Grammar +import qualified Data.Set as Set +import Data.String +import GHC.Exts +import Prelude hiding (filter) +import Witherable + +{- | +A regular grammar may be constructed using +`Lexical` and `Alternator` combinators. +Let's see an example using +[semantic versioning](https://semver.org/). + +>>> import Numeric.Natural (Natural) +>>> :{ +data SemVer = SemVer -- e.g., 2.1.5-rc.1+build.123 + { major :: Natural -- e.g., 1 + , minor :: Natural -- e.g., 2 + , patch :: Natural -- e.g., 3 + , preRelease :: [String] -- e.g., "alpha.1", "rc.2" + , buildMetadata :: [String] -- e.g., "build.123", "20130313144700" + } + deriving (Eq, Ord, Show, Read) +:} + +We'd like to define an optic @_SemVer@, +corresponding to the constructor pattern @SemVer@. +Unfortunately, we can't use TemplateHaskell to generate it in [GHCi] +(https://wiki.haskell.org/GHC/GHCi), +which is used to test this documenation. +Normally we would write `makeNestedPrisms` @''SemVer@, +but here is equivalent explicit Haskell code instead. +Since @SemVer@ is a newtype, @_SemVer@ can be an `Control.Lens.Iso.Iso`. + +>>> :set -XRecordWildCards +>>> import Control.Lens (Iso', iso) +>>> :{ +_SemVer :: Iso' SemVer (Natural, (Natural, (Natural, ([String], [String])))) +_SemVer = iso + (\SemVer {..} -> (major, (minor, (patch, (preRelease, buildMetadata))))) + (\(major, (minor, (patch, (preRelease, buildMetadata)))) -> SemVer {..}) +:} + +Now we can build a `RegGrammar` for @SemVer@ using the "idiom" style of +`Applicative` parsing with a couple modifications. + +>>> :{ +semverGrammar :: RegGrammar Char SemVer +semverGrammar = _SemVer + >? numberG + >*< terminal "." >* numberG + >*< terminal "." >* numberG + >*< option [] (terminal "-" >* identifiersG) + >*< option [] (terminal "+" >* identifiersG) + where + numberG = iso show read >~ someP (asIn @Char DecimalNumber) + identifiersG = several1 (sepBy (terminal ".")) (someP charG) + charG = asIn LowercaseLetter + <|> asIn UppercaseLetter + <|> asIn DecimalNumber + <|> token '-' +:} + +Instead of using the constructor @SemVer@ with the `Functor` applicator `<$>`, +we use the optic @_SemVer@ we defined and the `Choice` applicator `>?`; +although, we could have used the `Profunctor` applicator `>~` instead, +because @_SemVer@ is an `Control.Lens.Iso.Iso`. A few `Alternative` +combinators like `<|>` work both `Functor`ially and `Profunctor`ially. + ++------------+---------------+ +| Functorial | Profunctorial | ++============+===============+ +| @SemVer@ | @_SemVer@ | ++------------+---------------+ +| `<$>` | `>?` | ++------------+---------------+ +| `*>` | `>*` | ++------------+---------------+ +| `<*` | `*<` | ++------------+---------------+ +| `<*>` | `>*<` | ++------------+---------------+ +| `<|>` | `<|>` | ++------------+---------------+ +| `option` | `option` | ++------------+---------------+ +| `choice` | `choice` | ++------------+---------------+ +| `many` | `manyP` | ++------------+---------------+ +| `some` | `someP` | ++------------+---------------+ +| `optional` | `optionalP` | ++------------+---------------+ + +You can generate a `RegString` from a `RegGrammar` with `regstringG`. + +>>> putStringLn (regstringG semverGrammar) +\p{Nd}+(.\p{Nd}+(.\p{Nd}+((-((\p{Ll}|\p{Lu}|\p{Nd}|-)+(.(\p{Ll}|\p{Lu}|\p{Nd}|-)+)*))?(\+((\p{Ll}|\p{Lu}|\p{Nd}|-)+(.(\p{Ll}|\p{Lu}|\p{Nd}|-)+)*))?))) + +You can also generate parsers and printers. + +>>> [parsed | (parsed, "") <- parseG semverGrammar "2.1.5-rc.1+build.123"] +[SemVer {major = 2, minor = 1, patch = 5, preRelease = ["rc","1"], buildMetadata = ["build","123"]}] + +Parsing `uncons`es tokens left-to-right, from the beginning of a string. +Unparsing, on the other hand, `snoc`s tokens left-to-right, to the end of a string. + +>>> unparseG semverGrammar (SemVer 1 0 0 ["alpha"] []) "SemVer: " :: Maybe String +Just "SemVer: 1.0.0-alpha" + +Printing, on the gripping hand, `cons`es tokens right-to-left, to the beginning of a string. + +>>> ($ " is the SemVer.") <$> printG semverGrammar (SemVer 1 2 3 [] []) :: Maybe String +Just "1.2.3 is the SemVer." + +`Profunctor`ial combinators give us correct-by-construction invertible parsers. +New `RegGrammar` generators can be defined with new instances of `Lexical` `Alternator`s. +-} +type RegGrammar token a = forall p. + ( Lexical token p + , Alternator p + ) => p a a + +{- | Context-free `Grammar`s add two capabilities to `RegGrammar`s, +coming from the `BackusNaurForm` interface + +* `rule` abstraction, +* and general recursion. + +`regexGrammar` and `regbnfGrammar` are examples of context-free +`Grammar`s. Regular expressions are a form of expression algebra. +Let's see a similar but simpler example, +the algebra of arithmetic expressions of natural numbers. + +>>> import Numeric.Natural (Natural) +>>> :{ +data Arith + = Num Natural + | Add Arith Arith + | Mul Arith Arith + deriving stock (Eq, Ord, Show, Read) +:} + +Here are `Control.Lens.Prism.Prism`s for the constructor patterns. + +>>> import Control.Lens (Prism', prism') +>>> :{ +_Num :: Prism' Arith Natural +_Num = prism' Num (\case Num n -> Just n; _ -> Nothing) +_Add, _Mul :: Prism' Arith (Arith, Arith) +_Add = prism' (uncurry Add) (\case Add x y -> Just (x,y); _ -> Nothing) +_Mul = prism' (uncurry Mul) (\case Mul x y -> Just (x,y); _ -> Nothing) +:} + +Now we can build a `Grammar` for @Arith@ +by combining "idiom" style with named `rule`s, +and tying the recursive loop +(caused by parenthesization) +with `ruleRec`. + +>>> :{ +arithGrammar :: Grammar Char Arith +arithGrammar = ruleRec "arith" sumG + where + sumG arith = rule "sum" $ + chain1 Left _Add (sepBy (terminal "+")) (prodG arith) + prodG arith = rule "product" $ + chain1 Left _Mul (sepBy (terminal "*")) (factorG arith) + factorG arith = rule "factor" $ + numberG <|> terminal "(" >* arith *< terminal ")" + numberG = rule "number" $ + _Num . iso show read >? someP (asIn @Char DecimalNumber) +:} + +We can generate a `RegBnf`, printers and parsers from @arithGrammar@. + +>>> putStringLn (regbnfG arithGrammar) +{start} = \q{arith} +{arith} = \q{sum} +{factor} = \q{number}|\(\q{arith}\) +{number} = \p{Nd}+ +{product} = \q{factor}(\*\q{factor})* +{sum} = \q{product}(\+\q{product})* + +>>> [x | (x,"") <- parseG arithGrammar "1+2*3+4"] +[Add (Add (Num 1) (Mul (Num 2) (Num 3))) (Num 4)] +>>> unparseG arithGrammar (Add (Num 1) (Mul (Num 2) (Num 3))) "" :: Maybe String +Just "1+2*3" +>>> do pr <- printG arithGrammar (Num 69); return (pr "") :: Maybe String +Just "69" + +-} +type Grammar token a = forall p. + ( Lexical token p + , forall x. BackusNaurForm (p x x) + , Alternator p + ) => p a a + +{- | +In addition to context-sensitivity via `Monadic` combinators, +`CtxGrammar`s adds general filtration via `Filtrator` to `Grammar`s. + +>>> :{ +palindromeG :: CtxGrammar Char String +palindromeG = rule "palindrome" $ + satisfied (\wrd -> reverse wrd == wrd) >?< manyP (anyToken @Char) +:} + +The `satisfied` pattern is used together with the `Choice` & +`Data.Profunctor.Cochoice` applicator `>?<` for general filtration. +For context-sensitivity, +the `Monadic` interface is used by importing "Data.Profunctor.Monadic" +qualified and using a "bonding" notation which mixes +"idiom" style with qualified do-notation. +Let's use length-encoded vectors of numbers as an example. + +>>> import Numeric.Natural (Natural) +>>> import Control.Lens.Iso (Iso', iso) +>>> :set -XRecordWildCards +>>> :{ +data LenVec = LenVec {length :: Natural, vector :: [Natural]} + deriving (Eq, Ord, Show, Read) +_LenVec :: Iso' LenVec (Natural, [Natural]) +_LenVec = iso (\LenVec {..} -> (length, vector)) (\(length, vector) -> LenVec {..}) +:} + +>>> :set -XQualifiedDo +>>> import qualified Data.Profunctor.Monadic as P +>>> :{ +lenvecGrammar :: CtxGrammar Char LenVec +lenvecGrammar = _LenVec >? P.do + let + numberG = iso show read >~ someP (asIn @Char DecimalNumber) + vectorG n = intercalateP n (sepBy (terminal ",")) numberG + len <- numberG -- bonds to _LenVec + terminal ";" -- doesn't bond + vectorG (fromIntegral len) -- bonds to _LenVec +:} + +The qualified do-notation changes the signature of +@P.@`Data.Profunctor.Monadic.>>=`, +so that we must apply the constructor pattern @_LenVec@ +to the do-block with the `>?` applicator. +Any bound named variable, @var <- action@, +gets "bonded" to the constructor pattern. +Any unbound actions, except for the last action in the do-block, +does not get bonded to the pattern. +The last action does get bonded to the pattern. +Any unnamed bound action, @_ <- action@, +also gets bonded to the pattern, +but being unnamed means it isn't added to the context. +If all bound actions are unnamed, then a `CtxGrammar` can +be rewritten as a `Grammar` since it is context-free. +We can't generate a `RegBnf` since the `rule`s +of a `CtxGrammar` aren't static, but dynamic and contextual. +We can generate parsers and printers as expected. + +>>> [vec | (vec, "") <- parseG lenvecGrammar "3;1,2,3"] :: [LenVec] +[LenVec {length = 3, vector = [1,2,3]}] +>>> [vec | (vec, "") <- parseG lenvecGrammar "0;1,2,3"] :: [LenVec] +[] +>>> [pr "" | pr <- printG lenvecGrammar (LenVec 2 [6,7])] :: [String] +["2;6,7"] +>>> [pr "" | pr <- printG lenvecGrammar (LenVec 200 [100])] :: [String] +[] +>>> [pal | word <- ["racecar", "word"], (pal, "") <- parseG palindromeG word] +["racecar"] +-} +type CtxGrammar token a = forall p. + ( Lexical token p + , forall x. BackusNaurForm (p x x) + , Alternator p + , Filtrator p + , Monadic p + ) => p a a + +{- | +`Lexical` combinators include + +* `terminal` symbols from "Control.Lens.Grammar.Symbol"; +* `Tokenized` combinators from "Control.Lens.Grammar.Token"; +* `tokenClass`es from "Control.Lens.Grammar.Boole". +-} +type Lexical token p = + ( forall x y. (x ~ (), y ~ ()) => TerminalSymbol token (p x y) + , forall x y. (x ~ token, y ~ token) => TokenAlgebra token (p x y) + ) :: Constraint + +{- | `RegString`s are an embedded domain specific language +of regular expression strings. Since they are strings, +they have a string-like interface. + +>>> let rex = fromString "ab|c" :: RegString +>>> putStringLn rex +ab|c +>>> rex +"ab|c" + +`RegString`s can be generated from `RegGrammar`s with `regstringG`. + +>>> regstringG (terminal "a" >* terminal "b" <|> terminal "c") +"ab|c" + +`RegString`s are actually stored as an algebraic datatype, `RegEx`. + +>>> runRegString rex +RegExam (Alternate (Terminal "ab") (Terminal "c")) + +`RegString`s are similar to regular expression strings in many other +programming languages. We can use them to see if a word and pattern +are `Matching`. + +>>> "ab" =~ rex +True +>>> "c" =~ rex +True +>>> "xyz" =~ rex +False + +Like `RegGrammar`s, `RegString`s can use all the `Lexical` combinators. +Unlike `RegGrammar`s, instead of using `Monoidal` and `Alternator` combinators, +`RegString`s use `Monoid` and `KleeneStarAlgebra` combinators. + +>>> terminal "a" <> terminal "b" >|< terminal "c" :: RegString +"ab|c" +>>> mempty :: RegString +"" + +Since `RegString`s are a `KleeneStarAlgebra`, +they support Kleene quantifiers. + +>>> starK rex +"(ab|c)*" +>>> plusK rex +"(ab|c)+" +>>> optK rex +"(ab|c)?" + +Like other regular expression languages, `RegString`s support +character classes. + +>>> oneOf "abc" :: RegString +"[abc]" +>>> notOneOf "abc" :: RegString +"[^abc]" + +The character classes are used for failure, matching no character or string, +as well as the wildcard, matching any single character. + +>>> zeroK :: RegString +"[]" +>>> anyToken :: RegString +"[^]" + +Additional forms of character classes test for a character's `GeneralCategory`. + +>>> asIn LowercaseLetter :: RegString +"\\p{Ll}" +>>> notAsIn Control :: RegString +"\\P{Cc}" + +`KleeneStarAlgebra`s support alternation `>|<`, +and the `Tokenized` combinators are all negatable. +However, we'd like to be able to take the +intersection of character classes as well. +`RegString`s can combine characters' `tokenClass`es +using `BooleanAlgebra` combinators. + +>>> tokenClass (notOneOf "abc" >&&< notOneOf "xyz") :: RegString +"[^abcxyz]" +>>> tokenClass (oneOf "abcxyz" >&&< notOneOf "xyz") :: RegString +"[abc]" +>>> tokenClass (notOneOf "#$%" >&&< notAsIn Control) :: RegString +"[^#$%\\P{Cc}]" +>>> tokenClass (allB notAsIn [MathSymbol, Control]) :: RegString +"\\P{Sm|Cc}" +>>> tokenClass (notB (oneOf "xyz")) :: RegString +"[^xyz]" + +Ill-formed `RegString`s normalize to failure. + +>>> fromString ")(" :: RegString +"[]" +-} +newtype RegString = RegString {runRegString :: RegEx Char} + deriving newtype + ( Eq, Ord + , Semigroup, Monoid, KleeneStarAlgebra + , Tokenized Char, TokenAlgebra Char + , TerminalSymbol Char, NonTerminalSymbol + , Matching String + ) + +{- | `RegBnf`s are an embedded domain specific language +of Backus-Naur forms extended by regular expression strings. +Like `RegString`s they have a string-like interface. + +>>> let bnf = fromString "{start} = foo|bar" :: RegBnf +>>> putStringLn bnf +{start} = foo|bar +>>> bnf +"{start} = foo|bar" + +`RegBnf`s can be generated from context-free `Grammar`s with `regbnfG`. + +>>> :type regbnfG regbnfGrammar +regbnfG regbnfGrammar :: RegBnf + +Like `RegString`s, `RegBnf`s can be constructed using +`Lexical`, `Monoid` and `KleeneStarAlgebra` combinators. +But they also support `BackusNaurForm` `rule`s and `ruleRec`s. +-} +newtype RegBnf = RegBnf {runRegBnf :: Bnf RegString} + deriving newtype + ( Eq, Ord + , Semigroup, Monoid, KleeneStarAlgebra + , Tokenized Char, TokenAlgebra Char + , TerminalSymbol Char, NonTerminalSymbol + , BackusNaurForm + ) +instance Matching String RegBnf where + word =~ pattern = word =~ liftBnf1 runRegString (runRegBnf pattern) + +makeNestedPrisms ''Bnf +makeNestedPrisms ''RegEx +makeNestedPrisms ''RegExam +makeNestedPrisms ''CategoryTest +makeNestedPrisms ''GeneralCategory +makeNestedPrisms ''RegString +makeNestedPrisms ''RegBnf + +{- | `regexGrammar` is a context-free `Grammar` for `RegString`s. +It can't be a `RegGrammar`, since `RegString`s include parenthesization. +But [balanced parentheses](https://en.wikipedia.org/wiki/Dyck_language) +are a context-free language. + +>>> putStringLn (regbnfG regexGrammar) +{start} = \q{regex} +{alternate} = \q{sequence}(\|\q{sequence})* +{atom} = (\\q\{)\q{char}*\}|\q{char}|\q{char-class}|\(\q{regex}\) +{category} = Ll|Lu|Lt|Lm|Lo|Mn|Mc|Me|Nd|Nl|No|Pc|Pd|Ps|Pe|Pi|Pf|Po|Sm|Sc|Sk|So|Zs|Zl|Zp|Cc|Cf|Cs|Co|Cn +{category-test} = (\\p\{)\q{category}\}|(\\P\{)(\q{category}(\|\q{category})*)\} +{char} = [^\(\)\*\+\?\[\\\]\^\{\|\}\P{Cc}]|\\\q{char-escaped} +{char-any} = \[\^\] +{char-class} = \q{fail}|\q{char-any}|\q{one-of}|\q{not-one-of}|\q{category-test} +{char-control} = NUL|SOH|STX|ETX|EOT|ENQ|ACK|BEL|BS|HT|LF|VT|FF|CR|SO|SI|DLE|DC1|DC2|DC3|DC4|NAK|SYN|ETB|CAN|EM|SUB|ESC|FS|GS|RS|US|DEL|PAD|HOP|BPH|NBH|IND|NEL|SSA|ESA|HTS|HTJ|VTS|PLD|PLU|RI|SS2|SS3|DCS|PU1|PU2|STS|CCH|MW|SPA|EPA|SOS|SGCI|SCI|CSI|ST|OSC|PM|APC +{char-escaped} = [\(\)\*\+\?\[\\\]\^\{\|\}]|\q{char-control} +{expression} = \q{atom}\?|\q{atom}\*|\q{atom}\+|\q{atom} +{fail} = \[\] +{not-one-of} = (\[\^)\q{char}+(\q{category-test}?\]) +{one-of} = \[\q{char}+\] +{regex} = \q{alternate} +{sequence} = \q{char}*|\q{expression}* +-} +regexGrammar :: Grammar Char RegString +regexGrammar = _RegString >~ ruleRec "regex" altG + where + altG rex = rule "alternate" $ + chain1 Left (_RegExam . _Alternate) (sepBy (terminal "|")) (seqG rex) + + seqG rex = rule "sequence" $ choice + [ _Terminal >? manyP charG + , chain Left _Sequence (_Terminal . _Empty) noSep (exprG rex) + ] + + exprG rex = rule "expression" $ choice + [ _KleeneOpt >? atomG rex *< terminal "?" + , _KleeneStar >? atomG rex *< terminal "*" + , _KleenePlus >? atomG rex *< terminal "+" + , atomG rex + ] + + atomG rex = rule "atom" $ choice + [ _NonTerminal >? terminal "\\q{" >* manyP charG *< terminal "}" + , _Terminal >? charG >:< asEmpty + , _RegExam >? classG + , terminal "(" >* rex *< terminal ")" + ] + + catTestG = rule "category-test" $ choice + [ _AsIn >? terminal "\\p{" >* categoryG *< terminal "}" + , _NotAsIn >? several1 (sepBy (terminal "|")) + { beginBy = terminal "\\P{" + , endBy = terminal "}" + } categoryG + ] + + categoryG = rule "category" $ choice + [ _LowercaseLetter >? terminal "Ll" + , _UppercaseLetter >? terminal "Lu" + , _TitlecaseLetter >? terminal "Lt" + , _ModifierLetter >? terminal "Lm" + , _OtherLetter >? terminal "Lo" + , _NonSpacingMark >? terminal "Mn" + , _SpacingCombiningMark >? terminal "Mc" + , _EnclosingMark >? terminal "Me" + , _DecimalNumber >? terminal "Nd" + , _LetterNumber >? terminal "Nl" + , _OtherNumber >? terminal "No" + , _ConnectorPunctuation >? terminal "Pc" + , _DashPunctuation >? terminal "Pd" + , _OpenPunctuation >? terminal "Ps" + , _ClosePunctuation >? terminal "Pe" + , _InitialQuote >? terminal "Pi" + , _FinalQuote >? terminal "Pf" + , _OtherPunctuation >? terminal "Po" + , _MathSymbol >? terminal "Sm" + , _CurrencySymbol >? terminal "Sc" + , _ModifierSymbol >? terminal "Sk" + , _OtherSymbol >? terminal "So" + , _Space >? terminal "Zs" + , _LineSeparator >? terminal "Zl" + , _ParagraphSeparator >? terminal "Zp" + , _Control >? terminal "Cc" + , _Format >? terminal "Cf" + , _Surrogate >? terminal "Cs" + , _PrivateUse >? terminal "Co" + , _NotAssigned >? terminal "Cn" + ] + + classG = rule "char-class" $ choice + [ _Fail >? failG + , _Pass >? anyG + , _OneOf >? oneOfG + , _NotOneOf >? notOneOfG + , _NotOneOf >? pure Set.empty >*< catTestG + ] + + failG = rule "fail" $ terminal "[]" + + anyG = rule "char-any" $ terminal "[^]" + + oneOfG = rule "one-of" $ terminal "[" >* several1 noSep charG *< terminal "]" + + notOneOfG = rule "not-one-of" $ + terminal "[^" >* several1 noSep charG + >*< option (NotAsIn Set.empty) catTestG + *< terminal "]" + +charG :: Grammar Char Char +charG = rule "char" $ + tokenClass (notOneOf charsReserved >&&< notAsIn Control) + <|> terminal "\\" >* charEscapedG + where + charEscapedG = rule "char-escaped" $ + oneOf charsReserved <|> charControlG + + charsReserved = "()*+?[\\]^{|}" + + charControlG = rule "char-control" $ choice + [ only '\NUL' >? terminal "NUL" + , only '\SOH' >? terminal "SOH" + , only '\STX' >? terminal "STX" + , only '\ETX' >? terminal "ETX" + , only '\EOT' >? terminal "EOT" + , only '\ENQ' >? terminal "ENQ" + , only '\ACK' >? terminal "ACK" + , only '\BEL' >? terminal "BEL" + , only '\BS' >? terminal "BS" + , only '\HT' >? terminal "HT" + , only '\LF' >? terminal "LF" + , only '\VT' >? terminal "VT" + , only '\FF' >? terminal "FF" + , only '\CR' >? terminal "CR" + , only '\SO' >? terminal "SO" + , only '\SI' >? terminal "SI" + , only '\DLE' >? terminal "DLE" + , only '\DC1' >? terminal "DC1" + , only '\DC2' >? terminal "DC2" + , only '\DC3' >? terminal "DC3" + , only '\DC4' >? terminal "DC4" + , only '\NAK' >? terminal "NAK" + , only '\SYN' >? terminal "SYN" + , only '\ETB' >? terminal "ETB" + , only '\CAN' >? terminal "CAN" + , only '\EM' >? terminal "EM" + , only '\SUB' >? terminal "SUB" + , only '\ESC' >? terminal "ESC" + , only '\FS' >? terminal "FS" + , only '\GS' >? terminal "GS" + , only '\RS' >? terminal "RS" + , only '\US' >? terminal "US" + , only '\DEL' >? terminal "DEL" + , only '\x80' >? terminal "PAD" + , only '\x81' >? terminal "HOP" + , only '\x82' >? terminal "BPH" + , only '\x83' >? terminal "NBH" + , only '\x84' >? terminal "IND" + , only '\x85' >? terminal "NEL" + , only '\x86' >? terminal "SSA" + , only '\x87' >? terminal "ESA" + , only '\x88' >? terminal "HTS" + , only '\x89' >? terminal "HTJ" + , only '\x8A' >? terminal "VTS" + , only '\x8B' >? terminal "PLD" + , only '\x8C' >? terminal "PLU" + , only '\x8D' >? terminal "RI" + , only '\x8E' >? terminal "SS2" + , only '\x8F' >? terminal "SS3" + , only '\x90' >? terminal "DCS" + , only '\x91' >? terminal "PU1" + , only '\x92' >? terminal "PU2" + , only '\x93' >? terminal "STS" + , only '\x94' >? terminal "CCH" + , only '\x95' >? terminal "MW" + , only '\x96' >? terminal "SPA" + , only '\x97' >? terminal "EPA" + , only '\x98' >? terminal "SOS" + , only '\x99' >? terminal "SGCI" + , only '\x9A' >? terminal "SCI" + , only '\x9B' >? terminal "CSI" + , only '\x9C' >? terminal "ST" + , only '\x9D' >? terminal "OSC" + , only '\x9E' >? terminal "PM" + , only '\x9F' >? terminal "APC" + ] + +{- | +`regbnfGrammar` is a context-free `Grammar` for `RegBnf`s. +That means that it can generate a self-hosted definition. + +>>> putStringLn (regbnfG regbnfGrammar) +{start} = \q{regbnf} +{alternate} = \q{sequence}(\|\q{sequence})* +{atom} = (\\q\{)\q{char}*\}|\q{char}|\q{char-class}|\(\q{regex}\) +{category} = Ll|Lu|Lt|Lm|Lo|Mn|Mc|Me|Nd|Nl|No|Pc|Pd|Ps|Pe|Pi|Pf|Po|Sm|Sc|Sk|So|Zs|Zl|Zp|Cc|Cf|Cs|Co|Cn +{category-test} = (\\p\{)\q{category}\}|(\\P\{)(\q{category}(\|\q{category})*)\} +{char} = [^\(\)\*\+\?\[\\\]\^\{\|\}\P{Cc}]|\\\q{char-escaped} +{char-any} = \[\^\] +{char-class} = \q{fail}|\q{char-any}|\q{one-of}|\q{not-one-of}|\q{category-test} +{char-control} = NUL|SOH|STX|ETX|EOT|ENQ|ACK|BEL|BS|HT|LF|VT|FF|CR|SO|SI|DLE|DC1|DC2|DC3|DC4|NAK|SYN|ETB|CAN|EM|SUB|ESC|FS|GS|RS|US|DEL|PAD|HOP|BPH|NBH|IND|NEL|SSA|ESA|HTS|HTJ|VTS|PLD|PLU|RI|SS2|SS3|DCS|PU1|PU2|STS|CCH|MW|SPA|EPA|SOS|SGCI|SCI|CSI|ST|OSC|PM|APC +{char-escaped} = [\(\)\*\+\?\[\\\]\^\{\|\}]|\q{char-control} +{expression} = \q{atom}\?|\q{atom}\*|\q{atom}\+|\q{atom} +{fail} = \[\] +{not-one-of} = (\[\^)\q{char}+(\q{category-test}?\]) +{one-of} = \[\q{char}+\] +{regbnf} = (\{start\} = )\q{regex}(\LF\q{rule})* +{regex} = \q{alternate} +{rule} = \{\q{char}*(\} = )\q{regex} +{sequence} = \q{char}*|\q{expression}* +-} +regbnfGrammar :: Grammar Char RegBnf +regbnfGrammar = rule "regbnf" $ _RegBnf . _Bnf >~ + terminal "{start} = " >* regexGrammar + >*< several noSep (terminal "\n" >* ruleG) + where + ruleG = rule "rule" $ terminal "{" >* manyP charG *< terminal "} = " + >*< regexGrammar + +{- | `regstringG` generates a `RegString` from a regular grammar. +Since context-free `Grammar`s and `CtxGrammar`s aren't necessarily regular, +the type system will prevent `regstringG` from being applied to them. +-} +regstringG :: RegGrammar Char a -> RegString +regstringG rex = runGrammor rex + +{- | `regbnfG` generates a `RegBnf` from a context-free `Grammar`. +Since `CtxGrammar`s aren't context-free, +the type system will prevent `regbnfG` from being applied to a `CtxGrammar`. +It can apply to a `RegGrammar`. +-} +regbnfG :: Grammar Char a -> RegBnf +regbnfG bnf = runGrammor bnf + +{- | `printG` generates a printer from a `CtxGrammar`. +Since both `RegGrammar`s and context-free `Grammar`s are `CtxGrammar`s, +the type system will allow `printG` to be applied to them. +Running the printer on a syntax value returns a function +that `cons`es tokens at the beginning of an input string, +from right to left. +-} +printG + :: Cons string string token token + => (IsList string, Item string ~ token, Categorized token) + => (Alternative m, Monad m, Filterable m) + => CtxGrammar token a + -> a {- ^ syntax -} + -> m (string -> string) +printG printor = printP printor + +{- | `parseG` generates a parser from a `CtxGrammar`. +Since both `RegGrammar`s and context-free `Grammar`s are `CtxGrammar`s, +the type system will allow `parseG` to be applied to them. +Running the parser on an input string value `uncons`es +tokens from the beginning of an input string from left to right, +returning a syntax value and the remaining output string. +-} +parseG + :: (Cons string string token token, Snoc string string token token) + => (IsList string, Item string ~ token, Categorized token) + => (Alternative m, Monad m, Filterable m) + => CtxGrammar token a + -> string {- ^ input -} + -> m (a, string) +parseG parsor = parseP parsor + +{- | `unparseG` generates an unparser from a `CtxGrammar`. +Since both `RegGrammar`s and context-free `Grammar`s are `CtxGrammar`s, +the type system will allow `unparseG` to be applied to them. +Running the unparser on a syntax value and an input string +`snoc`s tokens at the end of the string, from left to right, +returning the output string. +-} +unparseG + :: (Cons string string token token, Snoc string string token token) + => (IsList string, Item string ~ token, Categorized token) + => (Alternative m, Monad m, Filterable m) + => CtxGrammar token a + -> a {- ^ syntax -} + -> string {- ^ input -} + -> m string +unparseG parsor = unparseP parsor + +{- | `putStringLn` is a utility that generalizes `putStrLn` +to string-like interfaces such as `RegString` and `RegBnf`. +-} +putStringLn :: (IsList string, Item string ~ Char) => string -> IO () +putStringLn = putStrLn . toList + +instance IsList RegString where + type Item RegString = Char + fromList + = fromMaybe zeroK + . listToMaybe + . mapMaybe prsF + . parseP regexGrammar + where + prsF (rex,"") = Just rex + prsF _ = Nothing + toList + = maybe "[]" ($ "") + . printP regexGrammar +instance IsString RegString where + fromString = fromList +instance Show RegString where + showsPrec precision = showsPrec precision . toList +instance Read RegString where + readsPrec _ str = [(fromList str, "")] +instance IsList RegBnf where + type Item RegBnf = Char + fromList + = fromMaybe zeroK + . listToMaybe + . mapMaybe prsF + . parseP regbnfGrammar + where + prsF (regbnf,"") = Just regbnf + prsF _ = Nothing + toList + = maybe "{start} = []" ($ "") + . printP regbnfGrammar +instance IsString RegBnf where + fromString = fromList +instance Show RegBnf where + showsPrec precision = showsPrec precision . toList +instance Read RegBnf where + readsPrec _ str = [(fromList str, "")] diff --git a/src/Control/Lens/Grammar/BackusNaur.hs b/src/Control/Lens/Grammar/BackusNaur.hs new file mode 100644 index 0000000..8beba91 --- /dev/null +++ b/src/Control/Lens/Grammar/BackusNaur.hs @@ -0,0 +1,189 @@ +{- | +Module : Control.Lens.Grammar.BackusNaur +Description : Backus-Naur forms & pattern matching +Copyright : (C) 2026 - Eitan Chatav +License : BSD-style (see the file LICENSE) +Maintainer : Eitan Chatav +Stability : provisional +Portability : non-portable + +See Naur & Backus, et al. +[Report on the Algorithmic Language ALGOL 60] +(https://softwarepreservation.computerhistory.org/ALGOL/report/Algol60_report_CACM_1960_June.pdf) +-} + +module Control.Lens.Grammar.BackusNaur + ( -- * BackusNaurForm + BackusNaurForm (..) + , Bnf (..) + , liftBnf0 + , liftBnf1 + , liftBnf2 + -- * Matching + , Matching (..) + , diffB + ) where + +import Control.Lens +import Control.Lens.Extras +import Control.Lens.Grammar.Boole +import Control.Lens.Grammar.Kleene +import Control.Lens.Grammar.Token +import Control.Lens.Grammar.Symbol +import Data.Coerce +import Data.Foldable +import Data.Function +import Data.MemoTrie +import qualified Data.Set as Set +import Data.Set (Set) + +{- | `BackusNaurForm` grammar combinators formalize +`rule` abstraction and general recursion. Context-free +`Control.Lens.Grammar.Grammar`s support the `BackusNaurForm` interface. +-} +class BackusNaurForm bnf where + + {- | For a `BackusNaurForm` parser instance, + `rule` can be used to detail parse errors. + + prop> rule name bnf = ruleRec name (\_ -> bnf) + -} + rule :: String -> bnf -> bnf + rule _ = id + + {- | General recursion, using `ruleRec`, rules can refer to themselves. -} + ruleRec :: String -> (bnf -> bnf) -> bnf + ruleRec _ = fix + +{- | A `Bnf` consists of a distinguished starting rule +and a set of named rules, supporting the `BackusNaurForm` interface. -} +data Bnf rule = Bnf + { startBnf :: rule + , rulesBnf :: Set (String, rule) + } deriving stock (Eq, Ord, Show, Read) + +{- | Lift a rule to a `Bnf`. -} +liftBnf0 :: Ord a => a -> Bnf a +liftBnf0 a = Bnf a mempty + +{- | Lift a function of rules to `Bnf`s. -} +liftBnf1 :: (Coercible a b, Ord b) => (a -> b) -> Bnf a -> Bnf b +liftBnf1 f (Bnf start rules) = Bnf (f start) (Set.map coerce rules) + +{- | Lift a binary function of rules to `Bnf`s. -} +liftBnf2 + :: (Coercible a c, Coercible b c, Ord c) + => (a -> b -> c) -> Bnf a -> Bnf b -> Bnf c +liftBnf2 f (Bnf start0 rules0) (Bnf start1 rules1) = + Bnf (f start0 start1) (Set.map coerce rules0 <> Set.map coerce rules1) + +-- | Does a word match a pattern? +class Matching word pattern | pattern -> word where + (=~) :: word -> pattern -> Bool + infix 2 =~ + +{- | +The [Brzozowski derivative] +(https://dl.acm.org/doi/pdf/10.1145/321239.321249) of a +`RegEx`tended `Bnf`, with memoization. + +prop> word =~ diffB prefix pattern = prefix <> word =~ pattern +-} +diffB + :: (Categorized token, HasTrie token) + => [token] -> Bnf (RegEx token) -> Bnf (RegEx token) +diffB prefix (Bnf start rules) = + Bnf (foldl' (flip diff1B) start prefix) rules + where + -- derivative wrt 1 token, memoized + diff1B = memo2 $ \x -> \case + Terminal [] -> zeroK + Terminal (tokenY:streamY) -> + if x == tokenY then terminal streamY else zeroK + NonTerminal nameY -> anyK (diff1B x) (rulesNamed nameY rules) + Sequence y1 y2 -> + if δ (Bnf y1 rules) then y1'y2 >|< y1y2' else y1'y2 + where + y1'y2 = diff1B x y1 <> y2 + y1y2' = y1 <> diff1B x y2 + KleeneStar y -> diff1B x y <> starK y + KleeneOpt y -> diff1B x y + KleenePlus y -> diff1B x y <> starK y + RegExam Fail -> zeroK + RegExam Pass -> mempty + RegExam (OneOf chars) -> + if x `elem` chars then mempty else zeroK + RegExam (NotOneOf chars (AsIn cat)) -> + if elem x chars || categorize x /= cat + then zeroK else mempty + RegExam (NotOneOf chars (NotAsIn cats)) -> + if elem x chars || elem (categorize x) cats + then zeroK else mempty + RegExam (Alternate y1 y2) -> diff1B x y1 >|< diff1B x y2 + +-- | Does a pattern match the empty word? +δ :: (Categorized token, HasTrie token) + => Bnf (RegEx token) -> Bool +δ (Bnf start rules) = ν start where + ν = memo $ \case + Terminal [] -> True + KleeneStar _ -> True + KleeneOpt _ -> True + KleenePlus y -> ν y + Sequence y1 y2 -> ν y1 && ν y2 + RegExam (Alternate y1 y2) -> ν y1 || ν y2 + NonTerminal nameY -> any ν (rulesNamed nameY rules) + _ -> False + +rulesNamed :: Ord rule => String -> Set (String, rule) -> Set rule +rulesNamed nameX = foldl' (flip inserter) Set.empty where + inserter (nameY,y) = + if nameX == nameY then Set.insert y else id + +-- instances +instance (Ord rule, NonTerminalSymbol rule) + => BackusNaurForm (Bnf rule) where + rule name = ruleRec name . const + ruleRec name f = + let + newStart = nonTerminal name + Bnf newRule oldRules = f (Bnf newStart mempty) + newRules = Set.insert (name, newRule) oldRules + in + Bnf newStart newRules +instance (Ord rule, TerminalSymbol token rule) + => TerminalSymbol token (Bnf rule) where + terminal = liftBnf0 . terminal +instance (Ord rule, NonTerminalSymbol rule) + => NonTerminalSymbol (Bnf rule) where + nonTerminal = liftBnf0 . nonTerminal +instance (Ord rule, Tokenized token rule) + => Tokenized token (Bnf rule) where + anyToken = liftBnf0 anyToken + token = liftBnf0 . token + oneOf = liftBnf0 . oneOf + notOneOf = liftBnf0 . notOneOf + asIn = liftBnf0 . asIn + notAsIn = liftBnf0 . notAsIn +instance (Ord rule, TokenAlgebra token rule) + => TokenAlgebra token (Bnf rule) where + tokenClass = liftBnf0 . tokenClass +instance (Ord rule, KleeneStarAlgebra rule) + => KleeneStarAlgebra (Bnf rule) where + starK = liftBnf1 starK + plusK = liftBnf1 plusK + optK = liftBnf1 optK + zeroK = liftBnf0 zeroK + (>|<) = liftBnf2 (>|<) +instance (Ord rule, Monoid rule) => Monoid (Bnf rule) where + mempty = liftBnf0 mempty +instance (Ord rule, Semigroup rule) => Semigroup (Bnf rule) where + (<>) = liftBnf2 (<>) +instance (Categorized token, HasTrie token) + => Matching [token] (Bnf (RegEx token)) where + (=~) word = δ . diffB word +instance (Categorized token, HasTrie token) + => Matching [token] (RegEx token) where + word =~ pattern = word =~ liftBnf0 pattern +instance Matching s (APrism s t a b) where + word =~ pattern = is pattern word diff --git a/src/Control/Lens/Grammar/Boole.hs b/src/Control/Lens/Grammar/Boole.hs new file mode 100644 index 0000000..f5a5cef --- /dev/null +++ b/src/Control/Lens/Grammar/Boole.hs @@ -0,0 +1,210 @@ +{- | +Module : Control.Lens.Grammar.Boole +Description : Boolean algebras & token classes +Copyright : (C) 2026 - Eitan Chatav +License : BSD-style (see the file LICENSE) +Maintainer : Eitan Chatav +Stability : provisional +Portability : non-portable + +See Boole, [The Mathematical Analysis of Logic] +(https://www.gutenberg.org/files/36884/36884-pdf.pdf). +Categorized token classes form a Boolean algebra. +-} + +module Control.Lens.Grammar.Boole + ( -- * BooleanAlgebra + BooleanAlgebra (..) + , andB, orB, allB, anyB + -- * TokenAlgebra + , TokenAlgebra (..) + , TokenTest (..) + ) where + +import Control.Applicative +import Control.Lens.Grammar.Kleene +import Control.Lens.Grammar.Token +import Data.Foldable +import Data.Monoid +import Data.Profunctor +import Data.Profunctor.Distributor +import qualified Data.Set as Set +import GHC.Generics + +-- | A `BooleanAlgebra`, like `Bool`, supporting classical logical operations. +class BooleanAlgebra b where + + -- | conjunction + (>&&<) :: b -> b -> b + default (>&&<) + :: (b ~ f bool, BooleanAlgebra bool, Applicative f) => b -> b -> b + (>&&<) = liftA2 (>&&<) + + -- | disjunction + (>||<) :: b -> b -> b + default (>||<) + :: (b ~ f bool, BooleanAlgebra bool, Applicative f) => b -> b -> b + (>||<) = liftA2 (>||<) + + -- | negation + notB :: b -> b + default notB + :: (b ~ f bool, BooleanAlgebra bool, Functor f) => b -> b + notB = fmap notB + + -- | inclusion + fromBool :: Bool -> b + default fromBool + :: (b ~ f bool, BooleanAlgebra bool, Applicative f) => Bool -> b + fromBool = pure . fromBool + +-- | cumulative conjunction +andB :: (Foldable f, BooleanAlgebra b) => f b -> b +andB = foldl' (>&&<) (fromBool True) + +-- | cumulative disjunction +orB :: (Foldable f, BooleanAlgebra b) => f b -> b +orB = foldl' (>||<) (fromBool False) + +-- | universal +allB :: (Foldable f, BooleanAlgebra b) => (a -> b) -> f a -> b +allB f = foldl' (\b a -> b >&&< f a) (fromBool True) + +-- | existential +anyB :: (Foldable f, BooleanAlgebra b) => (a -> b) -> f a -> b +anyB f = foldl' (\b a -> b >||< f a) (fromBool False) + +-- | `TokenTest` forms a closed `Tokenized` `BooleanAlgebra` +-- of `Categorized` `tokenClass`es. +newtype TokenTest token = TokenTest (RegExam token (TokenTest token)) + +-- | `TokenAlgebra` extends `Tokenized` methods to support +-- `BooleanAlgebra` operations in a `tokenClass` +class Tokenized token p => TokenAlgebra token p where + -- | Arguments of `tokenClass` can be constructed from + -- `Tokenized` and `BooleanAlgebra` methods. + tokenClass :: TokenTest token -> p + default tokenClass + :: (p ~ q token token, Alternator q, Cochoice q) + => TokenTest token -> p + tokenClass (TokenTest exam) = case exam of + Fail -> empty + Pass -> anyToken + OneOf chars -> oneOf chars + NotOneOf chars (AsIn cat) -> + satisfy (notOneOf chars >&&< asIn cat) + NotOneOf chars (NotAsIn cats) -> + satisfy (notOneOf chars >&&< allB notAsIn cats) + Alternate exam1 exam2 -> tokenClass exam1 <|> tokenClass exam2 + +--instances +instance BooleanAlgebra (x -> Bool) +instance Categorized token => TokenAlgebra token (token -> Bool) where + tokenClass (TokenTest exam) = case exam of + Fail -> const False + Pass -> const True + OneOf chars -> oneOf chars + NotOneOf chars (AsIn cat) -> notOneOf chars >&&< asIn cat + NotOneOf chars (NotAsIn cats) -> notOneOf chars >&&< allB notAsIn cats + Alternate exam1 exam2 -> tokenClass exam1 >||< tokenClass exam2 +instance (Applicative f, BooleanAlgebra bool) + => BooleanAlgebra (Ap f bool) +deriving stock instance Generic (TokenTest token) +deriving stock instance + (Categorized token, Read token, Read (Categorize token)) + => Read (TokenTest token) +deriving stock instance + (Categorized token, Show token, Show (Categorize token)) + => Show (TokenTest token) +instance BooleanAlgebra Bool where + fromBool = id + notB = not + (>&&<) = (&&) + (>||<) = (||) +deriving newtype instance Categorized token + => Eq (TokenTest token) +deriving newtype instance Categorized token + => Ord (TokenTest token) +deriving newtype instance Categorized token + => BooleanAlgebra (TokenTest token) +deriving newtype instance Categorized token + => Tokenized token (TokenTest token) +instance Categorized token + => TokenAlgebra token (RegEx token) where + tokenClass (TokenTest tokenExam) = case tokenExam of + Fail -> RegExam Fail + Pass -> RegExam Pass + OneOf as -> RegExam (OneOf as) + NotOneOf as catTest -> RegExam (NotOneOf as catTest) + Alternate exam1 exam2 -> + RegExam (Alternate (tokenClass exam1) (tokenClass exam2)) +instance Categorized token + => BooleanAlgebra (RegExam token (TokenTest token)) where + fromBool = \case + False -> Fail + True -> Pass + notB Fail = Pass + notB Pass = Fail + notB (Alternate (TokenTest x) (TokenTest y)) = notB x >&&< notB y + notB (OneOf xs) = notOneOf xs + notB (NotOneOf xs (AsIn y)) = oneOf xs >||< notAsIn y + notB (NotOneOf xs (NotAsIn ys)) = oneOf xs >||< anyB asIn ys + _ >&&< Fail = Fail + Fail >&&< _ = Fail + x >&&< Pass = x + Pass >&&< y = y + x >&&< Alternate (TokenTest y) (TokenTest z) = (x >&&< y) >||< (x >&&< z) + Alternate (TokenTest x) (TokenTest y) >&&< z = (x >&&< z) >||< (y >&&< z) + OneOf xs >&&< OneOf ys = OneOf (Set.intersection xs ys) + OneOf xs >&&< NotOneOf ys (AsIn z) = OneOf + (Set.filter (\x -> categorize x == z) (Set.difference xs ys)) + NotOneOf xs (AsIn y) >&&< OneOf zs = OneOf + (Set.filter (\z -> categorize z == y) (Set.difference zs xs)) + OneOf xs >&&< NotOneOf ys (NotAsIn zs) = OneOf + (Set.filter (\x -> categorize x `notElem` zs) (Set.difference xs ys)) + NotOneOf xs (NotAsIn ys) >&&< OneOf zs = OneOf + (Set.filter (\z -> categorize z `notElem` ys) (Set.difference zs xs)) + NotOneOf xs (AsIn y) >&&< NotOneOf ws (AsIn z) = + if y /= z then Fail else NotOneOf + (Set.filter (\x -> categorize x == y) + (Set.union xs ws)) (AsIn y) + NotOneOf xs (AsIn y) >&&< NotOneOf ws (NotAsIn zs) = + if y `elem` zs then Fail else NotOneOf + (Set.filter (\x -> categorize x == y) + (Set.union xs ws)) (AsIn y) + NotOneOf xs (NotAsIn ys) >&&< NotOneOf ws (AsIn z) = + if z `elem` ys then Fail else NotOneOf + (Set.filter (\x -> categorize x == z) (Set.union xs ws)) + (AsIn z) + NotOneOf xs (NotAsIn ys) >&&< NotOneOf ws (NotAsIn zs) = + let + xws = Set.union xs ws + yzs = Set.union ys zs + in + NotOneOf + (Set.filter (\x -> categorize x `notElem` yzs) xws) + (NotAsIn yzs) + x >||< Fail = x + Fail >||< y = y + _ >||< Pass = Pass + Pass >||< _ = Pass + x >||< Alternate y z = Alternate (TokenTest x) (TokenTest (Alternate y z)) + Alternate x y >||< z = Alternate (TokenTest (Alternate x y)) (TokenTest z) + OneOf xs >||< OneOf ys = oneOf (Set.union xs ys) + OneOf xs >||< NotOneOf ys z = + Alternate (TokenTest (OneOf xs)) (TokenTest (NotOneOf ys z)) + NotOneOf xs y >||< OneOf zs = + Alternate (TokenTest (NotOneOf xs y)) (TokenTest (OneOf zs)) + NotOneOf xs (NotAsIn ys) >||< NotOneOf ws (NotAsIn zs) = + notOneOf (Set.intersection xs ws) >&&< allB notAsIn (Set.intersection ys zs) + NotOneOf xs (AsIn y) >||< NotOneOf ws (AsIn z) = + if y == z then NotOneOf (Set.intersection xs ws) (AsIn y) + else Alternate + (TokenTest (NotOneOf xs (AsIn y))) + (TokenTest (NotOneOf ws (AsIn z))) + NotOneOf xs (NotAsIn ys) >||< NotOneOf ws (AsIn z) = Alternate + (TokenTest (NotOneOf xs (NotAsIn ys))) + (TokenTest (NotOneOf ws (AsIn z))) + NotOneOf xs (AsIn y) >||< NotOneOf ws (NotAsIn zs) = Alternate + (TokenTest (NotOneOf xs (AsIn y))) + (TokenTest (NotOneOf ws (NotAsIn zs))) diff --git a/src/Control/Lens/Grammar/Kleene.hs b/src/Control/Lens/Grammar/Kleene.hs new file mode 100644 index 0000000..f13a1f5 --- /dev/null +++ b/src/Control/Lens/Grammar/Kleene.hs @@ -0,0 +1,253 @@ +{- | +Module : Control.Lens.Grammar.Kleene +Description : Kleene star algebras & regular expressions +Copyright : (C) 2026 - Eitan Chatav +License : BSD-style (see the file LICENSE) +Maintainer : Eitan Chatav +Stability : provisional +Portability : non-portable + +Regular expressions form a Kleene star algebra. See Kleene, +[Representation of Events in Nerve Nets and Finite Automata] +(https://www.rand.org/pubs/research_memoranda/RM704.html) +-} + +module Control.Lens.Grammar.Kleene + ( -- * KleeneStarAlgebra + KleeneStarAlgebra (..) + , orK, anyK + -- * RegEx + , RegEx (..) + , RegExam (..) + , CategoryTest (..) + ) where + +import Control.Applicative +import Control.Lens.Grammar.Symbol +import Control.Lens.Grammar.Token +import Data.Foldable +import Data.MemoTrie +import Data.Monoid +import Data.Profunctor +import Data.Set (Set) +import qualified Data.Set as Set +import GHC.Generics + +{- | A `KleeneStarAlgebra` is a ring +with a generally non-commutaive multiplication, +the `Monoid` concatenation operator `<>` with identity `mempty`; +and an idempotent addition, the alternation operator `>|<` +with identity `zeroK`. + +It has three unary operators `optK`, `plusK` and the eponymous `starK`. + +prop> starK x = optK (plusK x) +prop> plusK x = x <> starK x +prop> optK x = mempty >|< x + +-} +class Monoid k => KleeneStarAlgebra k where + starK, plusK, optK :: k -> k + starK x = optK (plusK x) + plusK x = x <> starK x + optK x = mempty >|< x + infixl 3 >|< + (>|<) :: k -> k -> k + zeroK :: k + default (>|<) :: (k ~ f a, Alternative f) => k -> k -> k + default zeroK :: (k ~ f a, Alternative f) => k + (>|<) = (<|>) + zeroK = empty + +-- | cumulative alternation +orK :: (Foldable f, KleeneStarAlgebra k) => f k -> k +orK = foldl' (>|<) zeroK + +-- | universal +anyK :: (Foldable f, KleeneStarAlgebra k) => (a -> k) -> f a -> k +anyK f = foldl' (\b a -> b >|< f a) zeroK + +-- | The `RegEx`pression type is the prototypical `KleeneStarAlgebra`. +data RegEx token + = Terminal [token] + | NonTerminal String + | Sequence (RegEx token) (RegEx token) + | KleeneStar (RegEx token) + | KleeneOpt (RegEx token) + | KleenePlus (RegEx token) + | RegExam (RegExam token (RegEx token)) + +{- | A component of both `RegEx`pressions +and `Control.Lens.Grammar.Boole.TokenTest`s, so that the latter can +be embedded in the former with `Control.Lens.Grammar.Boole.tokenClass`. +-} +data RegExam token alg + = Fail + | Pass + | OneOf (Set token) + | NotOneOf (Set token) (CategoryTest token) + | Alternate alg alg + +{- | `CategoryTest`s for `Categorized` tokens.-} +data CategoryTest token + = AsIn (Categorize token) + | NotAsIn (Set (Categorize token)) + +--instances +instance (Alternative f, Monoid k) => KleeneStarAlgebra (Ap f k) +deriving stock instance Generic (RegEx token) +deriving stock instance Generic (RegExam token alg) +deriving stock instance Generic1 (RegExam token) +deriving stock instance Generic (CategoryTest token) +deriving stock instance Categorized token => Eq (RegEx token) +deriving stock instance Categorized token => Ord (RegEx token) +deriving stock instance + (Categorized token, Read token, Read (Categorize token)) + => Read (RegEx token) +deriving stock instance + (Categorized token, Show token, Show (Categorize token)) + => Show (RegEx token) +instance TerminalSymbol token (RegEx token) where + terminal = Terminal . toList +instance NonTerminalSymbol (RegEx token) where + nonTerminal = NonTerminal +instance Categorized token => Tokenized token (RegEx token) where + anyToken = RegExam Pass + token a = Terminal [a] + oneOf as | null as = RegExam Fail + oneOf as | length as == 1 = Terminal (toList as) + oneOf as = RegExam (OneOf (foldr Set.insert Set.empty as)) + notOneOf as | null as = RegExam Pass + notOneOf as = RegExam + (NotOneOf (foldr Set.insert Set.empty as) (NotAsIn Set.empty)) + asIn cat = RegExam (NotOneOf Set.empty (AsIn cat)) + notAsIn cat = RegExam + (NotOneOf Set.empty (NotAsIn (Set.singleton cat))) +instance Categorized token => Semigroup (RegEx token) where + Terminal [] <> rex = rex + rex <> Terminal [] = rex + RegExam Fail <> _ = zeroK + _ <> RegExam Fail = zeroK + Terminal str0 <> Terminal str1 = Terminal (str0 <> str1) + KleeneStar rex0 <> rex1 + | rex0 == rex1 = plusK rex0 + rex0 <> KleeneStar rex1 + | rex0 == rex1 = plusK rex1 + rex0 <> rex1 = Sequence rex0 rex1 +instance Categorized token => Monoid (RegEx token) where + mempty = Terminal [] +instance Categorized token => KleeneStarAlgebra (RegEx token) where + zeroK = RegExam Fail + optK (RegExam Fail) = mempty + optK (Terminal []) = mempty + optK (KleenePlus rex) = starK rex + optK rex = KleeneOpt rex + starK (RegExam Fail) = mempty + starK (Terminal []) = mempty + starK rex = KleeneStar rex + plusK (RegExam Fail) = zeroK + plusK (Terminal []) = mempty + plusK rex = KleenePlus rex + KleenePlus rex >|< Terminal [] = starK rex + Terminal [] >|< KleenePlus rex = starK rex + rex >|< Terminal [] = optK rex + Terminal [] >|< rex = optK rex + rex >|< RegExam Fail = rex + RegExam Fail >|< rex = rex + rex0 >|< rex1 | rex0 == rex1 = rex0 + rex0 >|< rex1 = RegExam (Alternate rex0 rex1) +instance Categorized token + => Tokenized token (RegExam token alg) where + anyToken = Pass + token a = OneOf (Set.singleton a) + oneOf as | null as = Fail + oneOf as = OneOf (Set.fromList (toList as)) + notOneOf as | null as = Pass + notOneOf as = + NotOneOf (Set.fromList (toList as)) (NotAsIn Set.empty) + asIn cat = NotOneOf Set.empty (AsIn cat) + notAsIn cat = + NotOneOf Set.empty (NotAsIn (Set.singleton cat)) +deriving stock instance + (Categorized token, Read token, Read alg, Read (Categorize token)) + => Read (RegExam token alg) +deriving stock instance + (Categorized token, Show token, Show alg, Show (Categorize token)) + => Show (RegExam token alg) +deriving stock instance Functor (RegExam token) +deriving stock instance Foldable (RegExam token) +deriving stock instance Traversable (RegExam token) +deriving stock instance (Categorized token, Eq alg) + => Eq (RegExam token alg) +deriving stock instance (Categorized token, Ord alg) + => Ord (RegExam token alg) +deriving stock instance Categorized token => Eq (CategoryTest token) +deriving stock instance Categorized token => Ord (CategoryTest token) +deriving stock instance + (Categorized token, Read token, Read (Categorize token)) + => Read (CategoryTest token) +deriving stock instance + (Categorized token, Show token, Show (Categorize token)) + => Show (CategoryTest token) +instance (Categorized token, HasTrie token) + => HasTrie (RegEx token) where + data (RegEx token :->: b) = RegExTrie + { terminalTrie :: [token] :->: b + , nonTerminalTrie :: String :->: b + , sequenceTrie :: (RegEx token, RegEx token) :->: b + , alternateTrie :: (RegEx token, RegEx token) :->: b + , kleeneStarTrie :: RegEx token :->: b + , kleeneOptTrie :: RegEx token :->: b + , kleenePlusTrie :: RegEx token :->: b + , failTrie :: b + , passTrie :: b + , oneOfTrie :: [token] :->: b + , notOneOfTrie :: ([token], Either Int [Int]) :->: b + } + trie f = RegExTrie + { terminalTrie = trie (f . terminal) + , nonTerminalTrie = trie (f . nonTerminal) + , sequenceTrie = trie (f . uncurry (<>)) + , alternateTrie = trie (f . uncurry (>|<)) + , kleeneStarTrie = trie (f . starK) + , kleeneOptTrie = trie (f . optK) + , kleenePlusTrie = trie (f . plusK) + , failTrie = f zeroK + , passTrie = f anyToken + , oneOfTrie = trie (f . oneOf) + , notOneOfTrie = trie (f . testNotOneOf) + } + untrie rex = \case + Terminal word -> untrie (terminalTrie rex) word + NonTerminal name -> untrie (nonTerminalTrie rex) name + Sequence x1 x2 -> untrie (sequenceTrie rex) (x1,x2) + KleeneStar x -> untrie (kleeneStarTrie rex) x + KleenePlus x -> untrie (kleenePlusTrie rex) x + KleeneOpt x -> untrie (kleeneOptTrie rex) x + RegExam Fail -> failTrie rex + RegExam Pass -> passTrie rex + RegExam (OneOf chars) -> untrie (oneOfTrie rex) (Set.toList chars) + RegExam (NotOneOf chars (AsIn cat)) -> + untrie (notOneOfTrie rex) (Set.toList chars, Left (fromEnum cat)) + RegExam (NotOneOf chars (NotAsIn cats)) -> + untrie (notOneOfTrie rex) + (Set.toList chars, Right (Set.toList (Set.map fromEnum cats))) + RegExam (Alternate x1 x2) -> untrie (alternateTrie rex) (x1,x2) + enumerate rex = mconcat + [ first' Terminal <$> enumerate (terminalTrie rex) + , first' NonTerminal <$> enumerate (nonTerminalTrie rex) + , first' (uncurry Sequence) <$> enumerate (sequenceTrie rex) + , first' (RegExam . uncurry Alternate) <$> enumerate (alternateTrie rex) + , first' KleeneStar <$> enumerate (kleeneStarTrie rex) + , first' KleeneOpt <$> enumerate (kleeneOptTrie rex) + , first' KleenePlus <$> enumerate (kleenePlusTrie rex) + , [(RegExam Fail, failTrie rex)] + , [(RegExam Pass, passTrie rex)] + , first' (RegExam . OneOf . Set.fromList) <$> enumerate (oneOfTrie rex) + , first' testNotOneOf <$> enumerate (notOneOfTrie rex) + ] +testNotOneOf + :: Categorized token + => ([token], Either Int [Int]) -> RegEx token +testNotOneOf (chars, catTest) = RegExam $ + NotOneOf (Set.fromList chars) (either (AsIn . toEnum) (NotAsIn . Set.map toEnum . Set.fromList) catTest) diff --git a/src/Control/Lens/Grammar/Symbol.hs b/src/Control/Lens/Grammar/Symbol.hs new file mode 100644 index 0000000..4f69e98 --- /dev/null +++ b/src/Control/Lens/Grammar/Symbol.hs @@ -0,0 +1,33 @@ +{- | +Module : Control.Lens.Grammar.Symbol +Description : terminal & nonterminal symbols +Copyright : (C) 2026 - Eitan Chatav +License : BSD-style (see the file LICENSE) +Maintainer : Eitan Chatav +Stability : provisional +Portability : non-portable +-} + +module Control.Lens.Grammar.Symbol + ( -- * Symbol + TerminalSymbol (..) + , NonTerminalSymbol (..) + ) where + +import Control.Lens +import Control.Lens.PartialIso +import Control.Lens.Grammar.Token +import Data.Profunctor +import Data.Profunctor.Monoidal + +-- | A `terminal` symbol in a grammar. +class TerminalSymbol token s | s -> token where + terminal :: [token] -> s + default terminal + :: (p () () ~ s, Tokenized token (p token token), Monoidal p, Cochoice p) + => [token] -> s + terminal = foldr (\a p -> only a ?< anyToken *> p) oneP + +-- | A `nonTerminal` symbol in a grammar. +class NonTerminalSymbol s where + nonTerminal :: String -> s diff --git a/src/Control/Lens/Grammar/Token.hs b/src/Control/Lens/Grammar/Token.hs new file mode 100644 index 0000000..bb9d825 --- /dev/null +++ b/src/Control/Lens/Grammar/Token.hs @@ -0,0 +1,112 @@ +{- | +Module : Control.Lens.Grammar.Token +Description : lexical tokens +Copyright : (C) 2026 - Eitan Chatav +License : BSD-style (see the file LICENSE) +Maintainer : Eitan Chatav +Stability : provisional +Portability : non-portable +-} + +module Control.Lens.Grammar.Token + ( -- * Tokenized + Tokenized (..) + , satisfy + , tokens + -- * Categorized + , Categorized (..) + , GeneralCategory (..) + ) where + +import Control.Lens +import Control.Lens.PartialIso +import Data.Char +import Data.Profunctor +import Data.Profunctor.Monoidal +import Data.Word + +{- | `Categorized` provides a type family `Categorize` +and a function to `categorize` tokens into disjoint categories. + +>>> :kind! Categorize Char +Categorize Char :: * += GeneralCategory + +>>> categorize 'a' +LowercaseLetter +-} +class (Ord token, Ord (Categorize token), Enum (Categorize token)) + => Categorized token where + type Categorize token + type Categorize token = () + categorize :: token -> Categorize token + default categorize :: Categorize token ~ () => token -> Categorize token + categorize _ = () +instance Categorized Char where + type Categorize Char = GeneralCategory + categorize = generalCategory +instance Categorized Word8 +instance Categorized () + +{- | `Tokenized` combinators for constructing lexical tokens. -} +class Categorized token => Tokenized token p | p -> token where + {- | Any single token. -} + anyToken :: p + + {- | A single specified `token`. -} + token :: token -> p + default token + :: (p ~ q token token, Choice q, Cochoice q) + => token -> p + token = satisfy . token + + {- | A single token which is `oneOf` a set. -} + oneOf :: Foldable f => f token -> p + default oneOf + :: (p ~ q token token, Choice q, Cochoice q, Foldable f) + => f token -> p + oneOf = satisfy . oneOf + + {- | A single token which is `notOneOf` a set. -} + notOneOf :: Foldable f => f token -> p + default notOneOf + :: (p ~ q token token, Choice q, Cochoice q, Foldable f) + => f token -> p + notOneOf = satisfy . notOneOf + + {- | A single token which is `asIn` a category. -} + asIn :: Categorize token -> p + default asIn + :: (p ~ q token token, Choice q, Cochoice q) + => Categorize token -> p + asIn = satisfy . asIn + + {- | A single token which is `notAsIn` a category. -} + notAsIn :: Categorize token -> p + default notAsIn + :: (p ~ q token token, Choice q, Cochoice q) + => Categorize token -> p + notAsIn = satisfy . notAsIn + +instance Categorized token => Tokenized token (token -> Bool) where + anyToken _ = True + token = (==) + oneOf = flip elem + notOneOf = flip notElem + asIn = lmap categorize . (==) + notAsIn = lmap categorize . (/=) + +{- | A single token that satisfies a predicate. -} +satisfy + :: (Tokenized a (p a a), Choice p, Cochoice p) + => (a -> Bool) -> p a a +satisfy f = satisfied f >?< anyToken + +{- | A specified stream of `tokens`. -} +tokens + :: ( Foldable f, Tokenized a (p a a) + , Monoidal p, Choice p + , AsEmpty s, Cons s s a a + ) + => f a -> p s s +tokens = foldr ((>:<) . token) asEmpty diff --git a/src/Control/Lens/Grate.hs b/src/Control/Lens/Grate.hs index 7c9266f..9db64ae 100644 --- a/src/Control/Lens/Grate.hs +++ b/src/Control/Lens/Grate.hs @@ -1,7 +1,7 @@ {- | Module : Control.Lens.Grate Description : grates -Copyright : (C) 2025 - Eitan Chatav +Copyright : (C) 2026 - Eitan Chatav License : BSD-style (see the file LICENSE) Maintainer : Eitan Chatav Stability : provisional @@ -35,7 +35,7 @@ import Data.Function import Data.Functor.Identity import Data.Functor.Rep import Data.Profunctor -import Data.Profunctor.Distributor +import Data.Profunctor.Monoidal {- | `Grate`s are an optic that are dual to `Control.Lens.Traversal.Traversal`s, as `Distributive` is `Traversable`. @@ -77,7 +77,7 @@ cloneGrate = grate . withGrate {- | Run `AGrate`. -} withGrate :: AGrate s t a b -> ((s -> a) -> b) -> t -withGrate grt = runGrating $ runIdentity <$> grt (Identity <$> anyToken) +withGrate grt = runGrating $ runIdentity <$> grt (Identity <$> Grating ($ id)) {- | Distribute over a `Closed` `Profunctor`. -} distributing @@ -108,8 +108,6 @@ instance Functor (Grating a b s) where fmap = fmapRep instance Applicative (Grating a b s) where pure = pureRep (<*>) = apRep -instance Tokenized a b (Grating a b) where - anyToken = Grating ($ id) instance Distributive (Grating a b s) where distribute = distributeRep collect = collectRep diff --git a/src/Control/Lens/Internal/NestedPrismTH.hs b/src/Control/Lens/Internal/NestedPrismTH.hs index 169787c..83e2520 100644 --- a/src/Control/Lens/Internal/NestedPrismTH.hs +++ b/src/Control/Lens/Internal/NestedPrismTH.hs @@ -1,7 +1,7 @@ {- | Module : Control.Lens.Internal.NestedPrismTH Description : nested pair prisms -Copyright : (C) 2025 - Eitan Chatav +Copyright : (C) 2026 - Eitan Chatav License : BSD-style (see the file LICENSE) Maintainer : Eitan Chatav Stability : provisional @@ -12,7 +12,7 @@ with small tweaks to support nested pairs. -} module Control.Lens.Internal.NestedPrismTH - ( -- * Nested Prisms + ( -- * Nested prisms makeNestedPrisms ) where @@ -44,6 +44,7 @@ import Prelude -- is that constructors with @n > 2@ arguments -- will use right-nested pairs, rather than a flat @n@-tuple. -- This makes them suitable for use on the left-hand-side of +-- `Control.Lens.PartialIso.>~`, -- `Control.Lens.PartialIso.>?` and `Control.Lens.PartialIso.>?<`; -- with repeated use of `Data.Profunctor.Distributor.>*<` -- on the right-hand-side, resulting in right-nested pairs. diff --git a/src/Control/Lens/Monocle.hs b/src/Control/Lens/Monocle.hs index 211b7fd..c2d055e 100644 --- a/src/Control/Lens/Monocle.hs +++ b/src/Control/Lens/Monocle.hs @@ -1,7 +1,7 @@ {- | Module : Control.Lens.Monocle Description : monocles -Copyright : (C) 2025 - Eitan Chatav +Copyright : (C) 2026 - Eitan Chatav License : BSD-style (see the file LICENSE) Maintainer : Eitan Chatav Stability : provisional @@ -29,7 +29,7 @@ module Control.Lens.Monocle import Control.Lens hiding (Traversing) import Control.Lens.Internal.Profunctor import Data.Distributive -import Data.Profunctor.Distributor +import Data.Profunctor.Monoidal {- | `Monocle`s are an optic that generalizes `Control.Lens.Traversal.Traversal`s & `Control.Lens.Grate.Grate`s. @@ -68,7 +68,7 @@ prop> traverse = ditraversed prop> cotraversed = ditraversed -} ditraversed :: (Traversable g, Distributive g) => Monocle (g a) (g b) a b -ditraversed = unwrapPafb . replicateP . WrapPafb +ditraversed = unwrapPafb . ditraverse . WrapPafb {- | Repeat action indefinitely. -} forevered :: Monocle s t () b @@ -76,14 +76,12 @@ forevered = unwrapPafb . foreverP . WrapPafb {- | Run `AMonocle` over an `Applicative`. -} withMonocle :: Applicative f => AMonocle s t a b -> ((s -> a) -> f b) -> f t -withMonocle mon = unMonocular (runIdentity <$> mon (Identity <$> anyToken)) +withMonocle mon = unMonocular (runIdentity <$> mon (Identity <$> Monocular ($ id))) {- | `Monocular` provides an efficient concrete representation of `Monocle`s. -} newtype Monocular a b s t = Monocular {unMonocular :: forall f. Applicative f => ((s -> a) -> f b) -> f t} -instance Tokenized a b (Monocular a b) where - anyToken = Monocular ($ id) instance Profunctor (Monocular a b) where dimap f g (Monocular k) = Monocular (fmap g . k . (. (. f))) diff --git a/src/Control/Lens/PartialIso.hs b/src/Control/Lens/PartialIso.hs index 08ff4e4..64c927a 100644 --- a/src/Control/Lens/PartialIso.hs +++ b/src/Control/Lens/PartialIso.hs @@ -1,7 +1,7 @@ {- | Module : Control.Lens.PartialIso Description : partial isomorphisms -Copyright : (C) 2025 - Eitan Chatav +Copyright : (C) 2026 - Eitan Chatav License : BSD-style (see the file LICENSE) Maintainer : Eitan Chatav Stability : provisional @@ -20,41 +20,45 @@ module Control.Lens.PartialIso , PartialIso' , APartialIso , PartialExchange (PartialExchange) - -- Combinators + -- * Combinators , partialIso + , partialInvoluted , withPartialIso , clonePartialIso , coPartialIso , crossPartialIso , altPartialIso - -- * Actions + -- * Applicators , (>?) , (?<) , (>?<) - , mapIso + , (>~) + , (~<) , coPrism -- * Patterns , satisfied , nulled , notNulled - , streamed - , maybeEot - , listEot - -- * Iterations + , eotMaybe + , eotList + -- * Iterators , iterating , difoldl1 , difoldr1 , difoldl , difoldr - , difoldl' - , difoldr' -- * Template Haskell , makeNestedPrisms + -- * Re-exports + , module Control.Lens.Iso + , module Control.Lens.Prism ) where import Control.Lens import Control.Lens.Internal.NestedPrismTH import Control.Lens.Internal.Profunctor +import Control.Lens.Iso +import Control.Lens.Prism import Control.Monad import Data.Functor.Compose import Data.Profunctor @@ -145,13 +149,18 @@ partialIso :: (s -> Maybe a) -> (b -> Maybe t) -> PartialIso s t a b partialIso f g = unright . iso (maybe (Left ()) Right . f =<<) (mapMaybe g) . right' +{- | Given a function that is its own partial inverse, +this gives you a `PartialIso'` using it in both directions. -} +partialInvoluted :: (a -> Maybe a) -> PartialIso' a a +partialInvoluted f = partialIso f f + {- | Convert `APartialIso` to the pair of functions that characterize it. -} withPartialIso :: APartialIso s t a b -> ((s -> Maybe a) -> (b -> Maybe t) -> r) -> r -withPartialIso i k = - case i (PartialExchange Just (Just . Just)) of +withPartialIso pattern k = + case pattern (PartialExchange Just (Just . Just)) of PartialExchange f g -> k f (join . g) {- | Clone `APartialIso` so that you can reuse the same @@ -160,14 +169,14 @@ monomorphically typed partial isomorphism for different purposes. clonePartialIso :: APartialIso s t a b -> PartialIso s t a b -clonePartialIso i = withPartialIso i $ \f g -> partialIso f g +clonePartialIso pattern = withPartialIso pattern $ \f g -> partialIso f g {- | Clone and invert `APartialIso`. -} coPartialIso :: APartialIso b a t s -> PartialIso s t a b -coPartialIso i = - withPartialIso i $ \f g -> partialIso g f +coPartialIso pattern = + withPartialIso pattern $ \f g -> partialIso g f {- | Construct a `PartialIso` on pairs from components. -} crossPartialIso @@ -223,8 +232,14 @@ infixl 4 ?< infixl 4 >?< {- | Action of `AnIso` on `Profunctor`s. -} -mapIso :: Profunctor p => AnIso s t a b -> p a b -> p s t -mapIso i = withIso i dimap +(>~) :: Profunctor p => AnIso s t a b -> p a b -> p s t +(>~) pattern = withIso pattern dimap +infixl 2 >~ + +{- | Inverse action of `AnIso` on `Profunctor`s. -} +(~<) :: Profunctor p => AnIso b a t s -> p a b -> p s t +(~<) pattern = withIso pattern (flip dimap) +infixl 2 ~< {- | Action of a `coPrism` on the composition of a `Profunctor` and `Filterable`. @@ -235,7 +250,7 @@ coPrism p = unwrapPafb . (?<) p . WrapPafb {- | `satisfied` is the prototypical proper partial isomorphism, identifying a subset which satisfies a predicate. -} satisfied :: (a -> Bool) -> PartialIso' a a -satisfied f = partialIso satiate satiate where +satisfied f = partialInvoluted satiate where satiate a = if f a then Just a else Nothing {- | `nulled` matches an `Empty` pattern, like `_Empty`. -} @@ -249,46 +264,33 @@ notNulled :: (AsEmpty s, AsEmpty t) => PartialIso s t s t notNulled = partialIso nonEmp nonEmp where nonEmp s = if isn't _Empty s then Just s else Nothing -{- | `streamed` is an isomorphism between -two stream types with the same token type. -} -streamed - :: (AsEmpty s, AsEmpty t, Cons s s c c, Cons t t c c) - => Iso' s t -streamed = iso convertStream convertStream - where - convertStream s = - maybe - Empty - (\(h,t) -> cons h (convertStream t)) - (uncons s) - {- | The either-of-tuples representation of `Maybe`. -} -maybeEot :: Iso (Maybe a) (Maybe b) (Either () a) (Either () b) -maybeEot = iso +eotMaybe :: Iso (Maybe a) (Maybe b) (Either () a) (Either () b) +eotMaybe = iso (maybe (Left ()) Right) (either (pure Nothing) Just) {- | The either-of-tuples representation of list-like streams. -} -listEot +eotList :: (Cons s s a a, AsEmpty t, Cons t t b b) => Iso s t (Either () (a,s)) (Either () (b,t)) -listEot = iso +eotList = iso (maybe (Left ()) Right . uncons) (either (const Empty) (review _Cons)) {- | Iterate the application of a partial isomorphism, useful for constructing fold/unfold isomorphisms. -} iterating :: APartialIso a b a b -> Iso a b a b -iterating i = withPartialIso i $ \f g -> +iterating pattern = withPartialIso pattern $ \f g -> iso (iter f) (iter g) where iter h state = maybe state (iter h) (h state) {- | Left fold & unfold `APartialIso` to an `Control.Lens.Iso.Iso`. -} difoldl1 :: Cons s t a b - => APartialIso (c,a) (d,b) c d - -> Iso (c,s) (d,t) (c,s) (d,t) -difoldl1 i = + => APartialIso d c (d,b) (c,a) + -> Iso (d,t) (c,s) (d,t) (c,s) +difoldl1 pattern = let associate = iso (\(c,(a,s)) -> ((c,a),s)) @@ -296,15 +298,15 @@ difoldl1 i = step = crossPartialIso id _Cons . associate - . crossPartialIso i id - in iterating step + . crossPartialIso (coPartialIso pattern) id + in from (iterating step) {- | Right fold & unfold `APartialIso` to an `Control.Lens.Iso.Iso`. -} difoldr1 :: Cons s t a b - => APartialIso (a,c) (b,d) c d - -> Iso (s,c) (t,d) (s,c) (t,d) -difoldr1 i = + => APartialIso d c (b,d) (a,c) + -> Iso (t,d) (s,c) (t,d) (s,c) +difoldr1 pattern = let reorder = iso (\((a,s),c) -> (s,(a,c))) @@ -312,76 +314,26 @@ difoldr1 i = step = crossPartialIso _Cons id . reorder - . crossPartialIso id i - in iterating step + . crossPartialIso id (coPartialIso pattern) + in from (iterating step) -{- | Left fold & unfold `APartialIso` to a `PartialIso`. -} +{- | Left fold & unfold `APartialIso` to a `Control.Lens.Prism.Prism`. -} difoldl - :: (AsEmpty s, AsEmpty t, Cons s t a b) - => APartialIso (c,a) (d,b) c d - -> PartialIso (c,s) (d,t) c d -difoldl i = - let - unit' = iso - (\(a,()) -> a) - (\a -> (a,())) - in - difoldl1 i - . crossPartialIso id nulled - . unit' - -{- | Right fold & unfold `APartialIso` to a `PartialIso`. -} + :: (AsEmpty t, Cons s t a b) + => APartialIso d c (d,b) (c,a) + -> Prism d c (d,t) (c,s) +difoldl pattern + = dimap (,Empty) (fmap fst) + . difoldl1 pattern + +{- | Right fold & unfold `APartialIso` to a `Control.Lens.Prism.Prism`. -} difoldr - :: (AsEmpty s, AsEmpty t, Cons s t a b) - => APartialIso (a,c) (b,d) c d - -> PartialIso (s,c) (t,d) c d -difoldr i = - let - unit' = iso - (\((),c) -> c) - (\d -> ((),d)) - in - difoldr1 i - . crossPartialIso nulled id - . unit' - -{- | Left fold & unfold `Control.Lens.Prism.APrism'` -to a `Control.Lens.Prism.Prism'`. -} -difoldl' - :: (AsEmpty s, Cons s s a a) - => APrism' (c,a) c - -> Prism' (c,s) c -difoldl' i = - let - unit' = iso - (\(a,()) -> a) - (\a -> (a,())) - in - difoldl1 (clonePrism i) - . aside _Empty - . unit' - -{- | Right fold & unfold `Control.Lens.Prism.APrism'` -to a `Control.Lens.Prism.Prism'`. -} -difoldr' - :: (AsEmpty s, Cons s s a a) - => APrism' (a,c) c - -> Prism' (s,c) c -difoldr' i = - let - unit' = iso - (\((),c) -> c) - (\c -> ((),c)) - asideFst k = - withPrism k $ \bt seta -> - prism (first' bt) $ \(s,e) -> - case seta s of - Left t -> Left (t,e) - Right a -> Right (a,e) - in - difoldr1 (clonePrism i) - . asideFst _Empty - . unit' + :: (AsEmpty t, Cons s t a b) + => APartialIso d c (b,d) (a,c) + -> Prism d c (t,d) (s,c) +difoldr pattern + = dimap (Empty,) (fmap snd) + . difoldr1 pattern -- Orphanage -- diff --git a/src/Control/Lens/Wither.hs b/src/Control/Lens/Wither.hs index b58a559..72dd0be 100644 --- a/src/Control/Lens/Wither.hs +++ b/src/Control/Lens/Wither.hs @@ -1,7 +1,7 @@ {- | Module : Control.Lens.Wither Description : withers -Copyright : (C) 2025 - Eitan Chatav +Copyright : (C) 2026 - Eitan Chatav License : BSD-style (see the file LICENSE) Maintainer : Eitan Chatav Stability : provisional diff --git a/src/Data/Profunctor/Distributor.hs b/src/Data/Profunctor/Distributor.hs index f6abdc3..1d2fd37 100644 --- a/src/Data/Profunctor/Distributor.hs +++ b/src/Data/Profunctor/Distributor.hs @@ -1,45 +1,44 @@ {-| Module : Data.Profunctor.Distributor Description : distributors -Copyright : (C) 2025 - Eitan Chatav +Copyright : (C) 2026 - Eitan Chatav License : BSD-style (see the file LICENSE) Maintainer : Eitan Chatav Stability : provisional Portability : non-portable -} -{-# OPTIONS_GHC -Wno-orphans #-} - module Data.Profunctor.Distributor - ( -- * Monoidal - Monoidal, oneP, (>*<), (>*), (*<), dimap2, foreverP, replicateP, meander, (>:<) - -- * Distributor - , Distributor (zeroP, (>+<), optionalP, manyP), dialt, Homogeneous (homogeneously) - -- * Alternator/Filtrator - , Alternator (alternate, someP), Filtrator (filtrate) + ( -- * Distributor + Distributor (..), dialt + -- * Alternator + , Alternator (..) + , choice + , option + -- * Homogeneous + , Homogeneous (..) -- * SepBy - , SepBy (..), sepBy, noSep, zeroOrMore, oneOrMore, chainl1, chainr1, chainl, chainr - -- * Tokenized - , Tokenized (anyToken), satisfy, token, tokens - -- * Printor/Parsor - , Printor (..), Parsor (..) + , SepBy (..) + , sepBy + , noSep + , several + , several1 + , chain + , chain1 + , intercalateP ) where import Control.Applicative hiding (WrappedArrow) import Control.Applicative qualified as Ap (WrappedArrow) import Control.Arrow import Control.Lens hiding (chosen) -import Control.Lens.Internal.Context -import Control.Lens.Internal.Iso -import Control.Lens.Internal.Prism import Control.Lens.Internal.Profunctor import Control.Lens.PartialIso -import Control.Monad import Data.Bifunctor.Clown import Data.Bifunctor.Joker import Data.Bifunctor.Product import Data.Complex -import Data.Distributive +import Data.Foldable hiding (toList) import Data.Functor.Adjunction import Data.Functor.Compose import Data.Functor.Contravariant.Divisible @@ -51,118 +50,16 @@ import Data.Profunctor qualified as Pro (WrappedArrow) import Data.Profunctor.Cayley import Data.Profunctor.Composition import Data.Profunctor.Monad +import Data.Profunctor.Monoidal import Data.Profunctor.Yoneda import Data.Proxy import Data.Sequence (Seq) -import Data.String import Data.Tagged import Data.Tree (Tree (..)) import Data.Vector (Vector) import Data.Void +import GHC.Exts import GHC.Generics -import Witherable - --- Monoidal -- - -{- | A lax `Monoidal` product `Profunctor` has unit `oneP` -and product `>*<` lax monoidal structure morphisms. -This is equivalent to the `Profunctor` also being `Applicative`. - -Laws: - ->>> let (f >< g) (a,c) = (f a, g c) ->>> let lunit = dimap (\((),a) -> a) (\a -> ((),a)) ->>> let runit = dimap (\(a,()) -> a) (\a -> (a,())) ->>> let assoc = dimap (\(a,(b,c)) -> ((a,b),c)) (\((a,b),c) -> (a,(b,c))) - -prop> dimap (f >< g) (h >< i) (p >*< q) = dimap f h p >*< dimap g i q -prop> oneP >*< p = lunit p -prop> p >*< oneP = runit p -prop> p >*< q >*< r = assoc ((p >*< q) >*< r) - --} -type Monoidal p = (Profunctor p, forall x. Applicative (p x)) - -{- | `oneP` is the unit of a `Monoidal` `Profunctor`. -} -oneP :: Monoidal p => p () () -oneP = pure () - -{- | `>*<` is the product of a `Monoidal` `Profunctor`. -} -(>*<) :: Monoidal p => p a b -> p c d -> p (a,c) (b,d) -(>*<) = dimap2 fst snd (,) -infixr 6 >*< - -{- | `>*` sequences actions, discarding the value of the first argument; -analagous to `*>`, extending it to `Monoidal`. - -prop> oneP >* p = p - --} -(>*) :: Monoidal p => p () c -> p a b -> p a b -x >* y = lmap (const ()) x *> y -infixl 5 >* - -{- | `*<` sequences actions, discarding the value of the second argument; -analagous to `<*`, extending it to `Monoidal`. - -prop> p *< oneP = p - --} -(*<) :: Monoidal p => p a b -> p () c -> p a b -x *< y = x <* lmap (const ()) y -infixl 5 *< - -{- | `dimap2` is a curried, functionalized form of `>*<`, -analagous to `liftA2`. -} -dimap2 - :: Monoidal p - => (s -> a) - -> (s -> c) - -> (b -> d -> t) - -> p a b -> p c d -> p s t -dimap2 f g h p q = liftA2 h (lmap f p) (lmap g q) - -{- | `foreverP` repeats an action indefinitely; -analagous to `forever`, extending it to `Monoidal`. -} -foreverP :: Monoidal p => p () c -> p a b -foreverP a = let a' = a >* a' in a' - -{- | Thanks to Fy on Monoidal Café Discord. - -`replicateP` is roughly analagous to `replicateM`, -repeating an action a number of times. -However, instead of an `Int` term, it expects -a `Traversable` & `Distributive` type. Such a -type is a homogeneous countable product. --} -replicateP - :: (Traversable t, Distributive t, Monoidal p) - => p a b -> p (t a) (t b) -replicateP p = traverse (\f -> lmap f p) (distribute id) - -{- | For any `Monoidal`, `Choice` & `Strong` `Profunctor`, -`meander` is invertible and gives a default implementation for the -`Data.Profunctor.Traversing.wander` -method of `Data.Profunctor.Traversing.Traversing`, -though `Strong` is not needed for its definition. - -See Pickering, Gibbons & Wu, -[Profunctor Optics - Modular Data Accessors](https://arxiv.org/abs/1703.10857) --} -meander - :: (Monoidal p, Choice p) - => ATraversal s t a b -> p a b -> p s t -meander f = dimap (f sell) iextract . trav - where - trav - :: (Monoidal q, Choice q) - => q u v -> q (Bazaar (->) u w x) (Bazaar (->) v w x) - trav q = mapIso funListEot $ right' (q >*< trav q) - -{- | A `Monoidal` `Cons` operator. -} -(>:<) :: (Monoidal p, Choice p, Cons s t a b) => p a b -> p s t -> p s t -x >:< xs = _Cons >? x >*< xs -infixr 5 >:< -- Distributor -- @@ -178,19 +75,18 @@ sum laws for `Distributor`. Laws: +>>> let lunit = dimap (either absurd id) Right +>>> let runit = dimap (either id absurd) Left >>> :{ -let f |+| g = either (Left . f) (Right . g) - lunit = dimap (either absurd id) Right - runit = dimap (either id absurd) Left - assoc = dimap +let assoc = dimap (either (Left . Left) (either (Left . Right) Right)) (either (either Left (Right . Left)) (Right . Right)) :} -prop> dimap (f |+| g) (h |+| i) (p >+< q) = dimap f h p >+< dimap g i q prop> zeroP >+< p = lunit p prop> p >+< zeroP = runit p prop> p >+< q >+< r = assoc ((p >+< q) >+< r) +prop> dimap (f >+< g) (h >+< i) (p >+< q) = dimap f h p >+< dimap g i q -} class Monoidal p => Distributor p where @@ -206,7 +102,7 @@ class Monoidal p => Distributor p where zeroP = empty {- | The sum structure morphism of a `Distributor`. - + `>+<` has a default for `Alternator`. prop> x >+< y = alternate (Left x) <|> alternate (Right y) @@ -220,11 +116,11 @@ class Monoidal p => Distributor p where {- | One or none. -} optionalP :: p a b -> p (Maybe a) (Maybe b) - optionalP p = mapIso maybeEot (oneP >+< p) + optionalP p = eotMaybe >~ oneP >+< p {- | Zero or more. -} manyP :: p a b -> p [a] [b] - manyP p = mapIso listEot (oneP >+< p >*< manyP p) + manyP p = eotList >~ oneP >+< p >*< manyP p instance Distributor (->) where zeroP = id @@ -315,11 +211,11 @@ class Traversable t => Homogeneous t where prop> homogeneously @Maybe = optionalP prop> homogeneously @[] = manyP - - Any `Traversable` & `Distributive` countable product + + Any `Traversable` & `Data.Distributive.Distributive` countable product can be given a default implementation for the `homogeneously` method. - prop> homogeneously = replicateP + prop> homogeneously = ditraverse And any user-defined homogeneous algebraic datatype has a default instance for `Homogeneous`, by deriving `Generic1`. @@ -407,15 +303,15 @@ instance Homogeneous Maybe where instance Homogeneous [] where homogeneously = manyP instance Homogeneous Vector where - homogeneously p = mapIso listEot (oneP >+< p >*< homogeneously p) + homogeneously p = eotList >~ oneP >+< p >*< homogeneously p instance Homogeneous Seq where - homogeneously p = mapIso listEot (oneP >+< p >*< homogeneously p) + homogeneously p = eotList >~ oneP >+< p >*< homogeneously p instance Homogeneous Complex where homogeneously p = dimap2 realPart imagPart (:+) p p instance Homogeneous Tree where homogeneously p = dimap2 rootLabel subForest Node p (manyP (homogeneously p)) --- Alternator/Filtrator -- +-- Alternator -- {- | The `Alternator` class co-extends `Choice` and `Distributor`, as well as `Alternative`, adding the `alternate` method, @@ -452,6 +348,14 @@ class (Choice p, Distributor p, forall x. Alternative (p x)) someP :: p a b -> p [a] [b] someP p = _Cons >? p >*< manyP p +-- | Combines all `Alternative` choices in the specified list. +choice :: (Foldable f, Alternative p) => f (p a) -> p a +choice = foldl' (<|>) empty + +-- | Perform an `Alternative` action or return a default value. +option :: Alternative p => a {- ^ default value -} -> p a -> p a +option a p = p <|> pure a + instance (Alternator p, Applicative f) => Alternator (WrappedPafb f p) where alternate = @@ -479,388 +383,77 @@ instance Alternator p => Alternator (Yoneda p) where alternate (Right p) = proreturn (alternate (Right (proextract p))) someP = proreturn . someP . proextract -{- | The `Filtrator` class extends `Cochoice`, -as well as `Filterable`, adding the `filtrate` method, -which is an oplax monoidal structure morphism dual to `>+<`. --} -class (Cochoice p, forall x. Filterable (p x)) - => Filtrator p where - - {- | - prop> unleft = fst . filtrate - prop> unright = snd . filtrate - - `filtrate` is a distant relative to `Data.Either.partitionEithers`. - - `filtrate` has a default for `Choice`. - -} - filtrate - :: p (Either a c) (Either b d) - -> (p a b, p c d) - default filtrate - :: Choice p - => p (Either a c) (Either b d) - -> (p a b, p c d) - filtrate = - dimapMaybe (Just . Left) (either Just (pure Nothing)) - &&& - dimapMaybe (Just . Right) (either (pure Nothing) Just) - -instance (Profunctor p, forall x. Functor (p x), Filterable f) - => Filtrator (WrappedPafb f p) where - filtrate (WrapPafb p) = - ( WrapPafb $ dimap Left (mapMaybe (either Just (const Nothing))) p - , WrapPafb $ dimap Right (mapMaybe (either (const Nothing) Just)) p - ) -instance Filtrator p => Filtrator (Coyoneda p) where - filtrate p = - let (q,r) = filtrate (proextract p) - in (proreturn q, proreturn r) -instance Filtrator p => Filtrator (Yoneda p) where - filtrate p = - let (q,r) = filtrate (proextract p) - in (proreturn q, proreturn r) -instance Filtrator (Forget r) where - filtrate (Forget f) = (Forget (f . Left), Forget (f . Right)) -instance (Filterable f, Traversable f) => Filtrator (Star f) where - filtrate (Star f) = - ( Star (mapMaybe (either Just (const Nothing)) . f . Left) - , Star (mapMaybe (either (const Nothing) Just) . f . Right) - ) -instance Filtrator (PartialExchange a b) where - filtrate (PartialExchange f g) = - ( PartialExchange (f . Left) (either Just (pure Nothing) <=< g) - , PartialExchange (f . Right) (either (pure Nothing) Just <=< g) - ) - --- SepBy -- - {- | Used to sequence multiple times, separated by a `separateBy`, begun by a `beginBy`, and ended by an `endBy`. -} data SepBy p = SepBy - { beginBy :: p () () - , endBy :: p () () - , separateBy :: p () () - } + { beginBy :: p + , endBy :: p + , separateBy :: p + } deriving stock + ( Functor, Foldable, Traversable + , Eq, Ord, Show, Read + ) {- | A `SepBy` smart constructor, setting the `separateBy` field, with no beginning or ending delimitors, except by updating `beginBy` or `endBy` fields. -} -sepBy :: Monoidal p => p () () -> SepBy p +sepBy :: Monoidal p => p () () -> SepBy (p () ()) sepBy = SepBy oneP oneP {- | A `SepBy` smart constructor for no separator, beginning or ending delimiters. -} -noSep :: Monoidal p => SepBy p +noSep :: Monoidal p => SepBy (p () ()) noSep = sepBy oneP {- | -prop> zeroOrMore noSep = manyP +prop> several noSep = manyP -} -zeroOrMore - :: Distributor p - => SepBy p -> p a b -> p [a] [b] -zeroOrMore sep p = mapIso listEot $ - beginBy sep >* oneP >+< p >*< manyP (separateBy sep >* p) *< endBy sep +several + :: (IsList s, IsList t, Distributor p) + => SepBy (p () ()) -> p (Item s) (Item t) -> p s t +several (SepBy beg end sep) p = iso toList fromList . eotList >~ + beg >* (oneP >+< p >*< manyP (sep >* p)) *< end {- | -prop> oneOrMore noSep = someP +prop> several1 noSep p = someP p -} -oneOrMore +several1 + :: (IsList s, IsList t, Distributor p, Choice p) + => SepBy (p () ()) -> p (Item s) (Item t) -> p s t +several1 (SepBy beg end sep) p = iso toList fromList . _Cons >? + beg >* (p >*< manyP (sep >* p)) *< end + +{- | Use a nilary constructor pattern to sequence zero times, or +associate a binary constructor pattern to sequence one or more times. -} +chain :: Alternator p - => SepBy p -> p a b -> p [a] [b] -oneOrMore sep p = _Cons >? - beginBy sep >* p >*< manyP (separateBy sep >* p) *< endBy sep - -{- | -Left associate a binary constructor pattern to sequence one or more times. --} -chainl1 - :: (Choice p, Cochoice p, Distributor p) - => APartialIso a b (a,a) (b,b) -- ^ binary constructor pattern - -> SepBy p -> p a b -> p a b -chainl1 pat sep p = - coPartialIso (difoldl (coPartialIso pat)) >?< - beginBy sep >* p >*< manyP (separateBy sep >* p) *< endBy sep - -{- | -Right associate a binary constructor pattern to sequence one or more times. --} -chainr1 - :: (Choice p, Cochoice p, Distributor p) - => APartialIso a b (a,a) (b,b) -- ^ binary constructor pattern - -> SepBy p -> p a b -> p a b -chainr1 c2 sep p = - coPartialIso (difoldr (coPartialIso c2)) >?< - beginBy sep >* manyP (p *< separateBy sep) >*< p *< endBy sep - -{- | -Left associate a binary constructor pattern to sequence one or more times, -or use a nilary constructor pattern to sequence zero times. --} -chainl - :: (Alternator p, Filtrator p) - => APartialIso a b (a,a) (b,b) -- ^ binary constructor pattern - -> APartialIso a b () () -- ^ nilary constructor pattern - -> SepBy p -> p a b -> p a b -chainl c2 c0 sep p = - beginBy sep >* - (c0 >?< oneP <|> chainl1 c2 (sepBy (separateBy sep)) p) - *< endBy sep - -{- | -Right associate a binary constructor pattern to sequence one or more times, -or use a nilary constructor pattern to sequence zero times. --} -chainr - :: (Alternator p, Filtrator p) - => APartialIso a b (a,a) (b,b) -- ^ binary constructor pattern - -> APartialIso a b () () -- ^ nilary constructor pattern - -> SepBy p -> p a b -> p a b -chainr c2 c0 sep p = - beginBy sep >* - (c0 >?< oneP <|> chainr1 c2 (sepBy (separateBy sep)) p) - *< endBy sep - --- Tokenized -- - -{- | `Tokenized` serves two different purposes. -The `anyToken` method is used - -* by token-stream printer/parsers, to sequence a single token; -* and for concrete optics, as an identity morphism. - -In the former case the associated input and output token types -are same. In the latter case, observe that `Identical` is -a free `Tokenized`. --} -class Tokenized a b p | p -> a, p -> b where - anyToken :: p a b -instance Tokenized a b (Identical a b) where - anyToken = Identical -instance Tokenized a b (Exchange a b) where - anyToken = Exchange id id -instance Tokenized a b (Market a b) where - anyToken = Market id Right -instance Tokenized a b (PartialExchange a b) where - anyToken = PartialExchange Just Just -instance (Tokenized a b p, Profunctor p, Applicative f) - => Tokenized a b (WrappedPafb f p) where - anyToken = WrapPafb (rmap pure anyToken) - -{- | Sequences a single token that satisfies a predicate. -} -satisfy :: (Choice p, Cochoice p, Tokenized c c p) => (c -> Bool) -> p c c -satisfy f = satisfied f >?< anyToken - -{- | Sequences a single specified `token`. -} -token :: (Cochoice p, Eq c, Tokenized c c p) => c -> p () () -token c = only c ?< anyToken - -{- | Sequences a specified stream of `tokens`. -It can be used as a default definition for the `fromString` -method of `IsString` when `Tokenized` `Char` `Char`. --} -tokens :: (Cochoice p, Monoidal p, Eq c, Tokenized c c p) => [c] -> p () () -tokens [] = oneP -tokens (c:cs) = token c *> tokens cs - --- Printor/Parsor -- - -{- | A function from things to containers of -functions of strings to strings. -`Printor` is a degenerate `Profunctor` which -is constant in its covariant argument. --} -newtype Printor s f a b = Printor {runPrintor :: a -> f (s -> s)} - deriving Functor -instance Contravariant (Printor s f a) where - contramap _ (Printor p) = Printor p -instance Applicative f => Applicative (Printor s f a) where - pure _ = Printor (\_ -> pure id) - Printor p <*> Printor q = Printor (\a -> (.) <$> p a <*> q a) -instance Alternative f => Alternative (Printor s f a) where - empty = Printor (\_ -> empty) - Printor p <|> Printor q = Printor (\a -> p a <|> q a) -instance Filterable (Printor s f a) where - mapMaybe _ (Printor p) = Printor p -instance Profunctor (Printor s f) where - dimap f _ (Printor p) = Printor (p . f) -instance Alternative f => Choice (Printor s f) where - left' = alternate . Left - right' = alternate . Right -instance Cochoice (Printor s f) where - unleft = fst . filtrate - unright = snd . filtrate -instance Applicative f => Distributor (Printor s f) where - zeroP = Printor absurd - Printor p >+< Printor q = Printor (either p q) -instance Alternative f => Alternator (Printor s f) where - alternate = \case - Left (Printor p) -> Printor (either p (\_ -> empty)) - Right (Printor p) -> Printor (either (\_ -> empty) p) -instance Filtrator (Printor s f) where - filtrate (Printor p) = (Printor (p . Left), Printor (p . Right)) -instance (Applicative f, Cons s t a b, s ~ t, a ~ b) - => Tokenized a b (Printor s f) where - anyToken = Printor (pure . cons) -instance (Applicative f, Cons s s Char Char, a ~ (), b ~ ()) - => IsString (Printor s f a b) where - fromString = tokens - -{- | A function from strings to containers of -pairs of things and strings. -`Parsor` is a degenerate `Profunctor` which -is constant in its contravariant argument. --} -newtype Parsor s f a b = Parsor {runParsor :: s -> f (b,s)} - deriving Functor -instance Monad f => Applicative (Parsor s f a) where - pure b = Parsor (\str -> return (b,str)) - Parsor x <*> Parsor y = Parsor $ \str -> do - (f, str') <- x str - (a, str'') <- y str' - return (f a, str'') -instance Monad f => Monad (Parsor s f a) where - Parsor p >>= f = Parsor $ \s -> do - (a, s') <- p s - runParsor (f a) s' -instance (Alternative f, Monad f) => Alternative (Parsor s f a) where - empty = Parsor (\_ -> empty) - Parsor p <|> Parsor q = Parsor (\str -> p str <|> q str) -instance (Alternative f, Monad f) => MonadPlus (Parsor s f a) -instance Filterable f => Filterable (Parsor s f a) where - mapMaybe f (Parsor p) = Parsor (mapMaybe (\(a,str) -> (,str) <$> f a) . p) -instance Functor f => Bifunctor (Parsor s f) where - bimap _ g (Parsor p) = Parsor (fmap (\(c,str) -> (g c, str)) . p) -instance Functor f => Profunctor (Parsor s f) where - dimap _ g (Parsor p) = Parsor (fmap (\(c,str) -> (g c, str)) . p) -instance (Monad f, Alternative f) => Choice (Parsor s f) where - left' = alternate . Left - right' = alternate . Right -instance Filterable f => Cochoice (Parsor s f) where - unleft = fst . filtrate - unright = snd . filtrate -instance (Monad f, Alternative f) => Distributor (Parsor s f) -instance (Monad f, Alternative f) => Alternator (Parsor s f) where - alternate = \case - Left (Parsor p) -> Parsor (fmap (\(b, str) -> (Left b, str)) . p) - Right (Parsor p) -> Parsor (fmap (\(b, str) -> (Right b, str)) . p) -instance Filterable f => Filtrator (Parsor s f) where - filtrate (Parsor p) = - ( Parsor (mapMaybe leftMay . p) - , Parsor (mapMaybe rightMay . p) - ) where - leftMay (e, str) = either (\b -> Just (b, str)) (\_ -> Nothing) e - rightMay (e, str) = either (\_ -> Nothing) (\b -> Just (b, str)) e -instance (Alternative f, Cons s t a b, s ~ t, a ~ b) - => Tokenized a b (Parsor s f) where - anyToken = Parsor (\str -> maybe empty pure (uncons str)) -instance (Alternative f, Filterable f, Monad f, Cons s s Char Char, a ~ (), b ~ ()) - => IsString (Parsor s f a b) where - fromString = tokens - --- FunList -- - -{- | -`FunList` is isomorphic to `Bazaar` @(->)@. -It's needed to define `meander`. - -See van Laarhoven, A non-regular data type challenge -[https://twanvl.nl/blog/haskell/non-regular1] --} -data FunList a b t - = DoneFun t - | MoreFun a (Bazaar (->) a b (b -> t)) -instance Functor (FunList a b) where - fmap f = \case - DoneFun t -> DoneFun (f t) - MoreFun a h -> MoreFun a (fmap (f .) h) -instance Applicative (FunList a b) where - pure = DoneFun - (<*>) = \case - DoneFun t -> fmap t - MoreFun a h -> \l -> - MoreFun a (flip <$> h <*> fromFun l) -instance Sellable (->) FunList where sell b = MoreFun b (pure id) - -toFun :: Bazaar (->) a b t -> FunList a b t -toFun (Bazaar f) = f sell - -fromFun :: FunList a b t -> Bazaar (->) a b t -fromFun = \case - DoneFun t -> pure t - MoreFun a f -> ($) <$> f <*> sell a - -funListEot :: Iso - (Bazaar (->) a1 b1 t1) (Bazaar (->) a2 b2 t2) - (Either t1 (a1, Bazaar (->) a1 b1 (b1 -> t1))) - (Either t2 (a2, Bazaar (->) a2 b2 (b2 -> t2))) -funListEot = iso toFun fromFun . iso f g where - f = \case - DoneFun t -> Left t - MoreFun a baz -> Right (a, baz) - g = \case - Left t -> DoneFun t - Right (a, baz) -> MoreFun a baz - --- Orphanage -- - -instance Monoid r => Applicative (Forget r a) where - pure _ = Forget mempty - Forget f <*> Forget g = Forget (f <> g) -instance Decidable f => Applicative (Clown f a) where - pure _ = Clown conquer - Clown x <*> Clown y = Clown (divide (id &&& id) x y) -deriving newtype instance Applicative f => Applicative (Joker f a) -deriving via Compose (p a) f instance - (Profunctor p, Applicative (p a), Applicative f) - => Applicative (WrappedPafb f p a) -deriving via Compose (p a) f instance - (Profunctor p, Alternative (p a), Applicative f) - => Alternative (WrappedPafb f p a) -instance (Closed p, Distributive f) - => Closed (WrappedPafb f p) where - closed (WrapPafb p) = WrapPafb (rmap distribute (closed p)) -deriving via (Ap.WrappedArrow p a) instance Arrow p - => Functor (Pro.WrappedArrow p a) -deriving via (Ap.WrappedArrow p a) instance Arrow p - => Applicative (Pro.WrappedArrow p a) -deriving via (Pro.WrappedArrow p) instance Arrow p - => Profunctor (Ap.WrappedArrow p) -instance (Monoidal p, Applicative (q a)) - => Applicative (Procompose p q a) where - pure b = Procompose (pure b) (pure b) - Procompose wb aw <*> Procompose vb av = Procompose - (dimap2 fst snd ($) wb vb) - (liftA2 (,) aw av) -instance (Monoidal p, Monoidal q) - => Applicative (Product p q a) where - pure b = Pair (pure b) (pure b) - Pair x0 y0 <*> Pair x1 y1 = Pair (x0 <*> x1) (y0 <*> y1) -instance (Functor f, Functor (p a)) => Functor (Cayley f p a) where - fmap f (Cayley x) = Cayley (fmap (fmap f) x) -instance (Applicative f, Applicative (p a)) => Applicative (Cayley f p a) where - pure b = Cayley (pure (pure b)) - Cayley x <*> Cayley y = Cayley ((<*>) <$> x <*> y) -instance (Profunctor p, Applicative (p a)) - => Applicative (Yoneda p a) where - pure = proreturn . pure - ab <*> cd = proreturn (proextract ab <*> proextract cd) -instance (Profunctor p, Applicative (p a)) - => Applicative (Coyoneda p a) where - pure = proreturn . pure - ab <*> cd = proreturn (proextract ab <*> proextract cd) - -instance (Profunctor p, Alternative (p a)) - => Alternative (Yoneda p a) where - empty = proreturn empty - ab <|> cd = proreturn (proextract ab <|> proextract cd) - many = proreturn . many . proextract -instance (Profunctor p, Alternative (p a)) - => Alternative (Coyoneda p a) where - empty = proreturn empty - ab <|> cd = proreturn (proextract ab <|> proextract cd) - many = proreturn . many . proextract + => (forall x. x -> Either x x) -- ^ `Left` or `Right` associate + -> APartialIso a b (a,a) (b,b) -- ^ binary constructor pattern + -> APrism a b () () -- ^ nilary constructor pattern + -> SepBy (p () ()) -> p a b -> p a b +chain association pat2 pat0 (SepBy beg end sep) p = + beg >* (pat0 >? oneP <|> chain1 association pat2 (sepBy sep) p) *< end + +{- | Associate a binary constructor pattern to sequence one or more times. -} +chain1 + :: (Distributor p, Choice p) + => (forall x. x -> Either x x) -- ^ `Left` or `Right` associate + -> APartialIso a b (a,a) (b,b) -- ^ binary constructor pattern + -> SepBy (p () ()) -> p a b -> p a b +chain1 association pat (SepBy beg end sep) = leftOrRight chainl1 chainr1 + where + leftOrRight a b = case association () of Left _ -> a; Right _ -> b + chainl1 p = difoldl pat >? beg >* p >*< manyP (sep >* p) *< end + chainr1 p = difoldr pat >? beg >* manyP (p *< sep) >*< p *< end + +{- | `intercalateP` adds a `SepBy` to `replicateP`. -} +intercalateP + :: (Monoidal p, Choice p, AsEmpty s, AsEmpty t, Cons s t a b) + => Int -> SepBy (p () ()) -> p a b -> p s t +intercalateP n (SepBy beg end _) _ | n <= 0 = + beg >* lmap (const Empty) asEmpty *< end +intercalateP n (SepBy beg end comma) p = + beg >* p >:< replicateP (n-1) (comma >* p) *< end diff --git a/src/Data/Profunctor/Filtrator.hs b/src/Data/Profunctor/Filtrator.hs new file mode 100644 index 0000000..75a1e42 --- /dev/null +++ b/src/Data/Profunctor/Filtrator.hs @@ -0,0 +1,96 @@ +{-| +Module : Data.Profunctor.Filtrator +Description : filtrators +Copyright : (C) 2026 - Eitan Chatav +License : BSD-style (see the file LICENSE) +Maintainer : Eitan Chatav +Stability : provisional +Portability : non-portable +-} + +module Data.Profunctor.Filtrator + ( -- * Filtrator + Filtrator (filtrate) + , mfiltrate + ) where + +import Control.Applicative +import Control.Arrow +import Control.Lens.PartialIso +import Control.Lens.Internal.Profunctor +import Control.Monad +import Data.Profunctor +import Data.Profunctor.Distributor +import Data.Profunctor.Monad +import Data.Profunctor.Monadic (Monadic) +import Data.Profunctor.Yoneda +import Witherable + +{- | The `Filtrator` class extends `Cochoice`, +as well as `Filterable`, adding the `filtrate` method, +which is an oplax monoidal structure morphism dual to `>+<`. + +prop> filtrate . uncurry (>+<) = id +prop> uncurry (>+<) . filtrate = id +-} +class (Cochoice p, forall x. Filterable (p x)) + => Filtrator p where + + {- | + prop> unleft = fst . filtrate + prop> unright = snd . filtrate + + `filtrate` is a distant relative to `Data.Either.partitionEithers`. + + `filtrate` has a default for `Choice`. + -} + filtrate + :: p (Either a c) (Either b d) + -> (p a b, p c d) + default filtrate + :: Choice p + => p (Either a c) (Either b d) + -> (p a b, p c d) + filtrate = + dimapMaybe (Just . Left) (either Just (const Nothing)) + &&& + dimapMaybe (Just . Right) (either (const Nothing) Just) + +-- | `mfiltrate` can be used as `filtrate`, for `Monadic` `Alternator`s. +-- +-- prop> mfiltrate = filtrate +mfiltrate + :: (Monadic p, Alternator p) + => p (Either a c) (Either b d) + -> (p a b, p c d) +mfiltrate = + (lmap Left >=> either pure (const empty)) + &&& + (lmap Right >=> either (const empty) pure) + +instance (Profunctor p, forall x. Functor (p x), Filterable f) + => Filtrator (WrappedPafb f p) where + filtrate (WrapPafb p) = + ( WrapPafb $ dimap Left (mapMaybe (either Just (const Nothing))) p + , WrapPafb $ dimap Right (mapMaybe (either (const Nothing) Just)) p + ) +instance Filtrator p => Filtrator (Coyoneda p) where + filtrate p = + let (q,r) = filtrate (proextract p) + in (proreturn q, proreturn r) +instance Filtrator p => Filtrator (Yoneda p) where + filtrate p = + let (q,r) = filtrate (proextract p) + in (proreturn q, proreturn r) +instance Filtrator (Forget r) where + filtrate (Forget f) = (Forget (f . Left), Forget (f . Right)) +instance (Filterable f, Traversable f) => Filtrator (Star f) where + filtrate (Star f) = + ( Star (mapMaybe (either Just (const Nothing)) . f . Left) + , Star (mapMaybe (either (const Nothing) Just) . f . Right) + ) +instance Filtrator (PartialExchange a b) where + filtrate (PartialExchange f g) = + ( PartialExchange (f . Left) (either Just (pure Nothing) <=< g) + , PartialExchange (f . Right) (either (pure Nothing) Just <=< g) + ) diff --git a/src/Data/Profunctor/Grammar.hs b/src/Data/Profunctor/Grammar.hs new file mode 100644 index 0000000..aca3466 --- /dev/null +++ b/src/Data/Profunctor/Grammar.hs @@ -0,0 +1,328 @@ +{-| +Module : Data.Profunctor.Grammar +Description : grammar distributors +Copyright : (C) 2026 - Eitan Chatav +License : BSD-style (see the file LICENSE) +Maintainer : Eitan Chatav +Stability : provisional +Portability : non-portable +-} + +module Data.Profunctor.Grammar + ( -- * Parsor + Parsor (..) + , unparseP + , parseP + -- * Printor + , Printor (..) + , printP + -- * Grammor + , Grammor (..) + ) where + +import Control.Applicative +import Control.Arrow +import Control.Category +import Control.Lens +import Control.Lens.Extras +import Control.Lens.Grammar.BackusNaur +import Control.Lens.Grammar.Boole +import Control.Lens.Grammar.Kleene +import Control.Lens.Grammar.Symbol +import Control.Lens.Grammar.Token +import Control.Monad +import Data.Coerce +import Data.Monoid +import Data.Profunctor +import Data.Profunctor.Distributor +import Data.Profunctor.Filtrator +import Data.Profunctor.Monoidal +import Data.Void +import Prelude hiding (id, (.)) +import GHC.Exts +import Witherable + +-- | `Parsor` is a simple invertible parser `Profunctor`. +newtype Parsor s f a b = Parsor {runParsor :: Maybe a -> s -> f (b,s)} + +-- | Run the parser on an input string, +-- `uncons`ing tokens from the beginning of the string, +-- from left to right, returning a value and the remaining string. +parseP :: Parsor s f a b -> s -> f (b,s) +parseP (Parsor f) = f Nothing + +-- | Run the parser in reverse on a value and an input string; +-- `snoc`ing tokens at the end of the string, from left to right, +-- and returning the new string. +unparseP :: Functor f => Parsor s f a b -> a -> s -> f s +unparseP (Parsor f) a = fmap snd . f (Just a) + +-- | `Printor` is a simple printer `Profunctor`. +newtype Printor s f a b = Printor {runPrintor :: a -> f (b, s -> s)} + +-- | Run the printer on a value, returning a function +-- that `cons`es tokens at the beginning of an input string, +-- from right to left. +printP :: Functor f => Printor s f a b -> a -> f (s -> s) +printP (Printor f) = fmap snd . f + +-- | `Grammor` is a constant `Profunctor`. +newtype Grammor k a b = Grammor {runGrammor :: k} + +-- Parsor instances +deriving stock instance Functor f => Functor (Parsor s f a) +instance Functor f => Profunctor (Parsor s f) where + dimap f g = Parsor . dimap (fmap f) (fmap (fmap (first' g))) . runParsor +instance Monad m => Applicative (Parsor s m a) where + pure b = Parsor (\_ s -> pure (b,s)) + Parsor f <*> Parsor x = Parsor $ \ma s -> do + (g, s') <- f ma s + (a, s'') <- x ma s' + return (g a, s'') +instance (Alternative m, Monad m) => Strong (Parsor s m) where + first' p = p >*< id + second' p = id >*< p +instance Monad m => Monad (Parsor s m a) where + return = pure + Parsor p >>= f = Parsor $ \ma s -> do + (a, s') <- p ma s + runParsor (f a) ma s' +instance (Alternative m, Monad m) => Alternative (Parsor s m a) where + empty = Parsor (\_ _ -> empty) + Parsor p <|> Parsor q = Parsor $ \ma s -> p ma s <|> q ma s +instance (Alternative m, Monad m) => MonadPlus (Parsor s m a) +instance Filterable f => Filterable (Parsor s f a) where + mapMaybe f (Parsor p) = Parsor $ \fa s -> + mapMaybe (\(a,t) -> fmap (,t) (f a)) (p fa s) +instance Filterable f => Cochoice (Parsor s f) where + unleft = fst . filtrate + unright = snd . filtrate +instance Filterable f => Filtrator (Parsor s f) where + filtrate (Parsor p) = + ( Parsor $ \ma s -> mapMaybe + (\case{(Left b,t) -> Just (b,t); _ -> Nothing}) + (p (fmap Left ma) s) + , Parsor $ \ma s -> mapMaybe + (\case{(Right b,t) -> Just (b,t); _ -> Nothing}) + (p (fmap Right ma) s) + ) +instance (Alternative m, Monad m) => Distributor (Parsor s m) +instance (Alternative m, Monad m) => Choice (Parsor s m) where + left' = alternate . Left + right' = alternate . Right +instance (Alternative m, Monad m) => Alternator (Parsor s m) where + alternate = \case + Left (Parsor p) -> Parsor $ \ma s -> case ma of + Nothing -> fmap (first' Left) (p Nothing s) + Just (Left a) -> fmap (first' Left) (p (Just a) s) + Just (Right _) -> empty + Right (Parsor p) -> Parsor $ \ma s -> case ma of + Nothing -> fmap (first' Right) (p Nothing s) + Just (Right a) -> fmap (first' Right) (p (Just a) s) + Just (Left _) -> empty +instance (Alternative m, Monad m) => Category (Parsor s m) where + id = Parsor $ \ma s -> case ma of + Nothing -> empty + Just a -> pure (a,s) + Parsor q . Parsor p = Parsor $ \ma s -> case ma of + Nothing -> empty + Just a -> do + (b, t) <- p (Just a) s + q (Just b) t +instance (Alternative m, Monad m) => Arrow (Parsor s m) where + arr f = Parsor $ \ma s -> case ma of + Nothing -> empty + Just a -> pure (f a, s) + (***) = (>*<) + first = first' + second = second' +instance (Alternative m, Monad m) => ArrowZero (Parsor s m) where + zeroArrow = empty +instance (Alternative m, Monad m) => ArrowPlus (Parsor s m) where + (<+>) = (<|>) +instance (Alternative m, Monad m) => ArrowChoice (Parsor s m) where + (+++) = (>+<) + left = left' + right = right' +instance + ( Categorized a, a ~ Item s, IsList s + , Cons s s a a, Snoc s s a a + , Filterable m, Alternative m, Monad m + ) => Tokenized a (Parsor s m a a) where + anyToken = Parsor $ maybe + (maybe empty pure . uncons) + (\a -> pure . (a,) . flip snoc a) +instance + ( Categorized a, a ~ Item s, IsList s + , Cons s s a a, Snoc s s a a + , Filterable m, Alternative m, Monad m + ) => TokenAlgebra a (Parsor s m a a) +instance + ( Categorized a, a ~ Item s, IsList s + , Cons s s a a, Snoc s s a a + , Filterable m, Alternative m, Monad m + ) => TerminalSymbol a (Parsor s m () ()) where +instance + ( Char ~ Item s, IsList s + , Cons s s Char Char, Snoc s s Char Char + , Filterable m, Alternative m, Monad m + ) => IsString (Parsor s m () ()) where + fromString = terminal +instance + ( Char ~ Item s, IsList s + , Cons s s Char Char, Snoc s s Char Char, AsEmpty s + , Filterable m, Alternative m, Monad m + ) => IsString (Parsor s m s s) where + fromString = tokens +instance BackusNaurForm (Parsor s m a b) +instance (Alternative m, Monad m) => MonadFail (Parsor s m a) where + fail _ = empty +instance AsEmpty s => Matching s (Parsor s [] a b) where + word =~ p = case + [ () | (_, remaining) <- runParsor p Nothing word + , is _Empty remaining + ] of [] -> False; _:_ -> True + +-- Printor instances +instance Functor f => Functor (Printor s f a) where + fmap f = Printor . fmap (fmap (first' f)) . runPrintor +instance Functor f => Profunctor (Printor s f) where + dimap f g = Printor . dimap f (fmap (first' g)) . runPrintor +instance Applicative f => Applicative (Printor s f a) where + pure b = Printor (\_ -> pure (b, id)) + Printor f <*> Printor x = Printor $ \c -> + liftA2 (\(g, p) (a, q) -> (g a, p . q)) (f c) (x c) +instance Alternative f => Alternative (Printor s f a) where + empty = Printor (\_ -> empty) + Printor p <|> Printor q = Printor (\a -> p a <|> q a) +instance Filterable f => Filterable (Printor s f a) where + mapMaybe f (Printor p) = Printor $ + mapMaybe (\(a,q) -> fmap (, q) (f a)) . p +instance Monad f => Monad (Printor s f a) where + return = pure + Printor mx >>= f = Printor $ \a -> do + (a1,g) <- mx a + (b,h) <- runPrintor (f a1) a + return (b, g . h) +instance (Alternative f, Monad f) => MonadPlus (Printor s f a) +instance Applicative f => Distributor (Printor s f) where + zeroP = Printor absurd + Printor p >+< Printor q = Printor $ + either (fmap (first' Left) . p) (fmap (first' Right) . q) +instance Alternative f => Alternator (Printor s f) where + alternate = \case + Left (Printor p) -> Printor $ + either (fmap (first' Left) . p) (\_ -> empty) + Right (Printor p) -> Printor $ + either (\_ -> empty) (fmap (first' Right) . p) +instance Filterable f => Filtrator (Printor s f) where + filtrate (Printor p) = + let + leftMaybe = \case + (Left b, q) -> Just (b, q) + _ -> Nothing + rightMaybe = \case + (Right b, q) -> Just (b, q) + _ -> Nothing + in + ( Printor (mapMaybe leftMaybe . p . Left) + , Printor (mapMaybe rightMaybe . p . Right) + ) +instance Alternative f => Choice (Printor s f) where + left' = alternate . Left + right' = alternate . Right +instance Filterable f => Cochoice (Printor s f) where + unleft = fst . filtrate + unright = snd . filtrate +instance Functor f => Strong (Printor s f) where + first' (Printor p) = + Printor (\(a,c) -> fmap (\(b,q) -> ((b,c),q)) (p a)) + second' (Printor p) = + Printor (\(c,a) -> fmap (\(b,q) -> ((c,b),q)) (p a)) +instance Monad f => Category (Printor s f) where + id = Printor $ \a -> return (a, id) + Printor q . Printor p = Printor $ \a -> do + (b, p') <- p a + (c, q') <- q b + return (c, q' . p') +instance Monad f => Arrow (Printor s f) where + arr f = Printor (return . (, id) . f) + (***) = (>*<) + first = first' + second = second' +instance (Alternative f, Monad f) => ArrowZero (Printor s f) where + zeroArrow = empty +instance (Alternative f, Monad f) => ArrowPlus (Printor s f) where + (<+>) = (<|>) +instance (Alternative f, Monad f) => ArrowChoice (Printor s f) where + (+++) = (>+<) + left = left' + right = right' +instance + ( Categorized a, a ~ Item s, IsList s, Cons s s a a + , Filterable m, Alternative m, Monad m + ) => Tokenized a (Printor s m a a) where + anyToken = Printor (\b -> pure (b, cons b)) +instance + ( Categorized a, a ~ Item s, IsList s, Cons s s a a + , Filterable m, Alternative m, Monad m + ) => TokenAlgebra a (Printor s m a a) +instance + ( Categorized a, a ~ Item s, IsList s, Cons s s a a + , Filterable m, Alternative m, Monad m + ) => TerminalSymbol a (Printor s m () ()) where +instance + ( Char ~ Item s, IsList s, Cons s s Char Char + , Filterable m, Alternative m, Monad m + ) => IsString (Printor s m () ()) where + fromString = terminal +instance + ( Char ~ Item s, IsList s, Cons s s Char Char, AsEmpty s + , Filterable m, Alternative m, Monad m + ) => IsString (Printor s m s s) where + fromString = tokens +instance BackusNaurForm (Printor s m a b) +instance (Alternative m, Monad m) => MonadFail (Printor s m a) where + fail _ = empty + +-- Grammor instances +instance Functor (Grammor k a) where fmap _ = coerce +instance Contravariant (Grammor k a) where contramap _ = coerce +instance Profunctor (Grammor k) where dimap _ _ = coerce +instance Bifunctor (Grammor k) where bimap _ _ = coerce +instance Choice (Grammor k) where + left' = coerce + right' = coerce +instance Monoid k => Applicative (Grammor k a) where + pure _ = Grammor mempty + Grammor rex1 <*> Grammor rex2 = Grammor (rex1 <> rex2) +instance KleeneStarAlgebra k => Alternative (Grammor k a) where + empty = Grammor zeroK + Grammor rex1 <|> Grammor rex2 = Grammor (rex1 >|< rex2) + many (Grammor rex) = Grammor (starK rex) + some (Grammor rex) = Grammor (plusK rex) +instance KleeneStarAlgebra k => Distributor (Grammor k) where + zeroP = Grammor zeroK + Grammor rex1 >+< Grammor rex2 = Grammor (rex1 >|< rex2) + manyP (Grammor rex) = Grammor (starK rex) + optionalP (Grammor rex) = Grammor (optK rex) +instance KleeneStarAlgebra k => Alternator (Grammor k) where + alternate = either coerce coerce + someP (Grammor rex) = Grammor (plusK rex) +instance Tokenized token k => Tokenized token (Grammor k a b) where + anyToken = Grammor anyToken + token = Grammor . token + oneOf = Grammor . oneOf + notOneOf = Grammor . notOneOf + asIn = Grammor . asIn + notAsIn = Grammor . notAsIn +instance TokenAlgebra a k => TokenAlgebra a (Grammor k a b) where + tokenClass = Grammor . tokenClass +instance TerminalSymbol token k + => TerminalSymbol token (Grammor k a b) where + terminal = Grammor . terminal +instance BackusNaurForm k => BackusNaurForm (Grammor k a b) where + rule name = Grammor . rule name . runGrammor + ruleRec name = Grammor . ruleRec name . dimap Grammor runGrammor +instance Matching s k => Matching s (Grammor k a b) where + word =~ pattern = word =~ runGrammor pattern diff --git a/src/Data/Profunctor/Monadic.hs b/src/Data/Profunctor/Monadic.hs new file mode 100644 index 0000000..54d4a37 --- /dev/null +++ b/src/Data/Profunctor/Monadic.hs @@ -0,0 +1,52 @@ +{-| +Module : Data.Profunctor.Monadic +Description : monadic profunctors +Copyright : (C) 2026 - Eitan Chatav +License : BSD-style (see the file LICENSE) +Maintainer : Eitan Chatav +Stability : provisional +Portability : non-portable + +See Li-yao Xia, [Monadic profunctors for bidirectional programming] +(https://blog.poisson.chat/posts/2017-01-01-monadic-profunctors.html) + +This module can provide qualified do-notation for `Monadic` profunctors. + +>>> :set -XQualifiedDo +>>> import qualified Data.Profunctor.Monadic as P + +See "Control.Lens.Grammar#t:CtxGrammar" for +an example of how to use "bonding" notation. +-} + +module Data.Profunctor.Monadic + ( -- * Monadic + Monadic + , (>>=) + , (>>) + , return + , fail + ) where + +import Data.Profunctor +import Prelude hiding ((>>=), (>>)) + +{- | A `Profunctor` which is also a `Monad`. -} +type Monadic p = (Profunctor p, forall x. Monad (p x)) + +{- | The pair bonding operator @P.@`>>=` is a context-sensitive +version of `Data.Profunctor.Monoidal.>*<`. + +prop> x >*< y = x P.>>= (\_ -> y) +-} +(>>=) :: Monadic p => p a b -> (b -> p c d) -> p (a,c) (b,d) +infixl 1 >>= +p >>= f = do + b <- lmap fst p + d <- lmap snd (f b) + return (b,d) + +{- | @P.@`>>` sequences actions. -} +(>>) :: Monadic p => p () c -> p a b -> p a b +infixl 1 >> +x >> y = do _ <- lmap (const ()) x; y diff --git a/src/Data/Profunctor/Monoidal.hs b/src/Data/Profunctor/Monoidal.hs new file mode 100644 index 0000000..b1aefa7 --- /dev/null +++ b/src/Data/Profunctor/Monoidal.hs @@ -0,0 +1,249 @@ +{-# OPTIONS_GHC -Wno-orphans #-} + +{-| +Module : Data.Profunctor.Monoidal +Description : monoidal profunctors +Copyright : (C) 2026 - Eitan Chatav +License : BSD-style (see the file LICENSE) +Maintainer : Eitan Chatav +Stability : provisional +Portability : non-portable +-} + +module Data.Profunctor.Monoidal + ( -- * Monoidal + Monoidal + , oneP, (>*<), (>*), (*<) + , dimap2, foreverP, ditraverse + -- * Monoidal & Choice + , replicateP, (>:<), asEmpty + , meander, eotFunList + ) where + +import Control.Applicative hiding (WrappedArrow) +import Control.Applicative qualified as Ap (WrappedArrow) +import Control.Arrow +import Control.Lens hiding (chosen) +import Control.Lens.Internal.Context +import Control.Lens.Internal.Profunctor +import Control.Lens.PartialIso +import Data.Bifunctor.Clown +import Data.Bifunctor.Joker +import Data.Bifunctor.Product +import Data.Distributive +import Data.Functor.Compose +import Data.Functor.Contravariant.Divisible +import Data.Profunctor hiding (WrappedArrow) +import Data.Profunctor qualified as Pro (WrappedArrow) +import Data.Profunctor.Cayley +import Data.Profunctor.Composition +import Data.Profunctor.Monad +import Data.Profunctor.Yoneda + +-- Monoidal -- + +{- | A lax `Monoidal` product `Profunctor` has unit `oneP` +and product `>*<` lax monoidal structure morphisms. +This is equivalent to the `Profunctor` also being `Applicative`. + +Laws: + +>>> let lunit = dimap (\((),a) -> a) (\a -> ((),a)) +>>> let runit = dimap (\(a,()) -> a) (\a -> (a,())) +>>> let assoc = dimap (\(a,(b,c)) -> ((a,b),c)) (\((a,b),c) -> (a,(b,c))) + +prop> oneP >*< p = lunit p +prop> p >*< oneP = runit p +prop> p >*< q >*< r = assoc ((p >*< q) >*< r) +prop> dimap (f >*< g) (h >*< i) (p >*< q) = dimap f h p >*< dimap g i q + +-} +type Monoidal p = (Profunctor p, forall x. Applicative (p x)) + +{- | `oneP` is the unit of a `Monoidal` `Profunctor`. -} +oneP :: Monoidal p => p () () +oneP = pure () + +{- | `>*<` is the product of a `Monoidal` `Profunctor`. -} +(>*<) :: Monoidal p => p a b -> p c d -> p (a,c) (b,d) +(>*<) = dimap2 fst snd (,) +infixr 5 >*< + +{- | `>*` sequences actions, discarding the value of the first argument; +analagous to `*>`, extending it to `Monoidal`. + +prop> oneP >* p = p + +-} +(>*) :: Monoidal p => p () c -> p a b -> p a b +x >* y = lmap (const ()) x *> y +infixl 6 >* + +{- | `*<` sequences actions, discarding the value of the second argument; +analagous to `<*`, extending it to `Monoidal`. + +prop> p *< oneP = p + +-} +(*<) :: Monoidal p => p a b -> p () c -> p a b +x *< y = x <* lmap (const ()) y +infixl 6 *< + +{- | `dimap2` is a curried, functionalized form of `>*<`, +analagous to `liftA2`. -} +dimap2 + :: Monoidal p + => (s -> a) + -> (s -> c) + -> (b -> d -> t) + -> p a b -> p c d -> p s t +dimap2 f g h p q = liftA2 h (lmap f p) (lmap g q) + +{- | `foreverP` repeats an action indefinitely; +analagous to `Control.Monad.forever`, extending it to `Monoidal`. -} +foreverP :: Monoidal p => p () c -> p a b +foreverP a = let a' = a >* a' in a' + +{- | A `Monoidal` & `Choice` nil operator. -} +asEmpty :: (AsEmpty s, Monoidal p, Choice p) => p s s +asEmpty = _Empty >? oneP + +{- | A `Monoidal` & `Choice` cons operator. -} +(>:<) :: (Cons s t a b, Monoidal p, Choice p) => p a b -> p s t -> p s t +x >:< xs = _Cons >? x >*< xs +infixr 5 >:< + +{- | Thanks to Fy on Monoidal Café Discord. + +A `Traversable` & `Data.Distributive.Distributive` type +is a homogeneous countable product. +That means it is a static length container, so unlike `replicateP`, +`ditraverse` does not need an `Int` argument. +-} +ditraverse + :: (Traversable t, Distributive t, Monoidal p) + => p a b -> p (t a) (t b) +ditraverse p = traverse (\f -> lmap f p) (distribute id) + +{- | `replicateP` is analagous to `Control.Monad.replicateM`, +for `Monoidal` & `Choice` `Profunctor`s. -} +replicateP + :: (Monoidal p, Choice p, AsEmpty s, AsEmpty t, Cons s t a b) + => Int -> p a b -> p s t +replicateP n _ | n <= 0 = lmap (const Empty) asEmpty +replicateP n a = a >:< replicateP (n-1) a + +{- | For any `Monoidal`, `Choice` & `Strong` `Profunctor`, +`meander` is invertible and gives a default implementation for the +`Data.Profunctor.Traversing.wander` +method of `Data.Profunctor.Traversing.Traversing`, +though `Strong` is not needed for its definition. + +See Pickering, Gibbons & Wu, +[Profunctor Optics - Modular Data Accessors](https://arxiv.org/abs/1703.10857) +-} +meander + :: (Monoidal p, Choice p) + => ATraversal s t a b -> p a b -> p s t +meander f = dimap (f sell) iextract . meandering + where + meandering + :: (Monoidal q, Choice q) + => q u v -> q (Bazaar (->) u w x) (Bazaar (->) v w x) + meandering q = eotFunList >~ right' (q >*< meandering q) + +{- | +`eotFunList` is used to define `meander`. +See van Laarhoven, [A non-regular data type challenge] +(https://twanvl.nl/blog/haskell/non-regular1), +both post and comments, for details. +-} +eotFunList :: Iso + (Bazaar (->) a1 b1 t1) (Bazaar (->) a2 b2 t2) + (Either t1 (a1, Bazaar (->) a1 b1 (b1 -> t1))) + (Either t2 (a2, Bazaar (->) a2 b2 (b2 -> t2))) +eotFunList = iso (f . toFun) (fromFun . g) where + f = \case + DoneFun t -> Left t + MoreFun a baz -> Right (a, baz) + g = \case + Left t -> DoneFun t + Right (a, baz) -> MoreFun a baz +data FunList a b t + = DoneFun t + | MoreFun a (Bazaar (->) a b (b -> t)) +toFun :: Bazaar (->) a b t -> FunList a b t +toFun (Bazaar f) = f sell +fromFun :: FunList a b t -> Bazaar (->) a b t +fromFun = \case + DoneFun t -> pure t + MoreFun a f -> ($) <$> f <*> sell a +instance Functor (FunList a b) where + fmap f = \case + DoneFun t -> DoneFun (f t) + MoreFun a h -> MoreFun a (fmap (f .) h) +instance Applicative (FunList a b) where + pure = DoneFun + (<*>) = \case + DoneFun t -> fmap t + MoreFun a h -> \l -> + MoreFun a (flip <$> h <*> fromFun l) +instance Sellable (->) FunList where sell b = MoreFun b (pure id) + +-- Orphanage -- + +instance Monoid r => Applicative (Forget r a) where + pure _ = Forget mempty + Forget f <*> Forget g = Forget (f <> g) +instance Decidable f => Applicative (Clown f a) where + pure _ = Clown conquer + Clown x <*> Clown y = Clown (divide (id &&& id) x y) +deriving newtype instance Applicative f => Applicative (Joker f a) +deriving via Compose (p a) f instance + (Profunctor p, Applicative (p a), Applicative f) + => Applicative (WrappedPafb f p a) +deriving via Compose (p a) f instance + (Profunctor p, Alternative (p a), Applicative f) + => Alternative (WrappedPafb f p a) +instance (Closed p, Distributive f) + => Closed (WrappedPafb f p) where + closed (WrapPafb p) = WrapPafb (rmap distribute (closed p)) +deriving via (Ap.WrappedArrow p a) instance Arrow p + => Functor (Pro.WrappedArrow p a) +deriving via (Ap.WrappedArrow p a) instance Arrow p + => Applicative (Pro.WrappedArrow p a) +deriving via (Pro.WrappedArrow p) instance Arrow p + => Profunctor (Ap.WrappedArrow p) +instance (Monoidal p, Applicative (q a)) + => Applicative (Procompose p q a) where + pure b = Procompose (pure b) (pure b) + Procompose wb aw <*> Procompose vb av = Procompose + (dimap2 fst snd ($) wb vb) + (liftA2 (,) aw av) +instance (Monoidal p, Monoidal q) + => Applicative (Product p q a) where + pure b = Pair (pure b) (pure b) + Pair x0 y0 <*> Pair x1 y1 = Pair (x0 <*> x1) (y0 <*> y1) +instance (Functor f, Functor (p a)) => Functor (Cayley f p a) where + fmap f (Cayley x) = Cayley (fmap (fmap f) x) +instance (Applicative f, Applicative (p a)) => Applicative (Cayley f p a) where + pure b = Cayley (pure (pure b)) + Cayley x <*> Cayley y = Cayley ((<*>) <$> x <*> y) +instance (Profunctor p, Applicative (p a)) + => Applicative (Yoneda p a) where + pure = proreturn . pure + ab <*> cd = proreturn (proextract ab <*> proextract cd) +instance (Profunctor p, Applicative (p a)) + => Applicative (Coyoneda p a) where + pure = proreturn . pure + ab <*> cd = proreturn (proextract ab <*> proextract cd) +instance (Profunctor p, Alternative (p a)) + => Alternative (Yoneda p a) where + empty = proreturn empty + ab <|> cd = proreturn (proextract ab <|> proextract cd) + many = proreturn . many . proextract +instance (Profunctor p, Alternative (p a)) + => Alternative (Coyoneda p a) where + empty = proreturn empty + ab <|> cd = proreturn (proextract ab <|> proextract cd) + many = proreturn . many . proextract diff --git a/src/Text/Grammar/Distributor.hs b/src/Text/Grammar/Distributor.hs deleted file mode 100644 index e5a5222..0000000 --- a/src/Text/Grammar/Distributor.hs +++ /dev/null @@ -1,521 +0,0 @@ -{-| -Module : Text.Grammar.Distributor -Description : grammars -Copyright : (C) 2025 - Eitan Chatav -License : BSD-style (see the file LICENSE) -Maintainer : Eitan Chatav -Stability : provisional -Portability : non-portable - -See Joachim Breitner, -[Showcasing Applicative] -(https://www.joachim-breitner.de/blog/710-Showcasing_Applicative) -for idea to unify grammars. --} - -module Text.Grammar.Distributor - ( -- * Grammar - Grammar, Grammarr, Grammatical (..) - -- * Generators - , genReadS - , readGrammar - , genShowS - , showGrammar - , genRegEx - , genGrammar - , printGrammar - -- * RegEx - , RegEx (..) - , regexNorm - , regexParse - , regexString - , regexGrammar - ) where - -import Control.Applicative -import Control.Lens -import Control.Lens.PartialIso -import Data.Char -import Data.Coerce -import Data.Foldable -import Data.Function -import Data.Profunctor -import Data.Profunctor.Distributor -import Data.Set (Set, insert) -import Data.String -import GHC.Generics -import Witherable - -{- | `Grammar` is a Backus-Naur form grammar, -extended by regular expressions, -embedded in Haskell, with combinators: - -* pattern matching `>?`, `>?<` -* alternation `<|>` -* sequencing `>*<`, `>*`, `*<` -* Kleene quantifiers `optionalP`, `manyP`, `someP` -* any character `anyToken` -* regular predicates `inClass`, `notInClass`, `inCategory`, `notInCategory` -* nonregular predicate `satisfy` -* terminal strings `tokens`, `fromString` and -XOverloadedStrings -* nonterminal rules `rule`, `ruleRec` -* and more. - -To see an example of a `Grammar`, look at the source of `regexGrammar`. --} -type Grammar a = forall p. Grammatical p => p a a - -{- | A `Grammarr` is just a function of `Grammar`s, -useful for expressing one in terms of another `Grammar`. -The arr is for arrow; and it should be pronounced like a pirate. --} -type Grammarr a b = forall p. Grammatical p => p a a -> p b b - -{- | One can create new generators from a `Grammar` by defining -instances of `Grammatical`. For instance, one could create -generators for Parsec style parsers, and use `rule` for -labeling of parse errors. - -A `Grammatical` `Profunctor` is a partial distributor, -being an `Alternator` & `Filtrator`. -It is also `Tokenized` with `Char` input & output tokens, -and `IsString` with the property: - -prop> fromString = tokens - -`Grammatical` has defaults for methods -`inClass`, `notInClass`, `inCategory`, `notInCategory` -in terms of `satisfy`; -and `rule` & `ruleRec` in terms of `id` & `fix`. --} -class - ( Alternator p - , Filtrator p - , Tokenized Char Char p - , forall t. t ~ p () () => IsString t - ) => Grammatical p where - - {- | Only characters which are in the given `String`.-} - inClass :: String -> p Char Char - inClass str = satisfy $ \ch -> elem ch str - - {- | Only characters which are not in the given `String`.-} - notInClass :: String -> p Char Char - notInClass str = satisfy $ \ch -> notElem ch str - - {- | Only characters which are in the given `GeneralCategory`.-} - inCategory :: GeneralCategory -> p Char Char - inCategory cat = satisfy $ \ch -> cat == generalCategory ch - - {- | Only characters which are not in the given `GeneralCategory`.-} - notInCategory :: GeneralCategory -> p Char Char - notInCategory cat = satisfy $ \ch -> cat /= generalCategory ch - - {- | A nonterminal rule. -} - rule :: String -> p a a -> p a a - rule _ = id - - {- | A recursive, nonterminal rule. -} - ruleRec :: String -> (p a a -> p a a) -> p a a - ruleRec name = rule name . fix - -instance (Alternative f, Cons s s Char Char) - => Grammatical (Printor s f) -instance (Monad f, Alternative f, Filterable f, Cons s s Char Char) - => Grammatical (Parsor s f) - --- RegEx -- - -{- | A version of regular expressions extended by nonterminals. -} -data RegEx - = Terminal String -- ^ @abc123etc\\.@ - | Sequence RegEx RegEx -- ^ @xy@ - | Fail -- ^ @\\q@ - | Alternate RegEx RegEx -- ^ @x|y@ - | KleeneOpt RegEx -- ^ @x?@ - | KleeneStar RegEx -- ^ @x*@ - | KleenePlus RegEx -- ^ @x+@ - | AnyChar -- ^ @.@ - | InClass String -- ^ @[abc]@ - | NotInClass String -- ^ @[^abc]@ - | InCategory GeneralCategory -- ^ @\\p{Lu}@ - | NotInCategory GeneralCategory -- ^ @\\P{Ll}@ - | NonTerminal String -- ^ @\\q{rule-name}@ - deriving stock (Eq, Ord, Show, Generic) -makeNestedPrisms ''RegEx -makeNestedPrisms ''GeneralCategory - -(-*-), (|||) :: RegEx -> RegEx -> RegEx - -Terminal "" -*- rex = rex -rex -*- Terminal "" = rex -Fail -*- _ = Fail -_ -*- Fail = Fail -Terminal str0 -*- Terminal str1 = Terminal (str0 <> str1) -KleeneStar rex0 -*- rex1 | rex0 == rex1 = plusK rex0 -rex0 -*- KleeneStar rex1 | rex0 == rex1 = plusK rex0 -rex0 -*- rex1 = Sequence rex0 rex1 - -KleenePlus rex ||| Terminal "" = starK rex -Terminal "" ||| KleenePlus rex = starK rex -rex ||| Terminal "" = optK rex -Terminal "" ||| rex = optK rex -rex ||| Fail = rex -Fail ||| rex = rex -rex0 ||| rex1 | rex0 == rex1 = rex0 -rex0 ||| rex1 = Alternate rex0 rex1 - -optK, starK, plusK :: RegEx -> RegEx - -optK Fail = Terminal "" -optK (Terminal "") = Terminal "" -optK (KleenePlus rex) = starK rex -optK rex = KleeneOpt rex - -starK Fail = Terminal "" -starK (Terminal "") = Terminal "" -starK rex = KleeneStar rex - -plusK Fail = Fail -plusK (Terminal "") = Terminal "" -plusK rex = KleenePlus rex - -{- | Normalize a `RegEx`. - ->>> regexNorm (Sequence (Terminal "abc") (Terminal "xyz")) -Terminal "abcxyz" --} -regexNorm :: RegEx -> RegEx -regexNorm = \case - Sequence rex0 rex1 -> regexNorm rex0 -*- regexNorm rex1 - Alternate rex0 rex1 -> regexNorm rex0 ||| regexNorm rex1 - KleeneOpt rex -> optK (regexNorm rex) - KleeneStar rex -> starK (regexNorm rex) - KleenePlus rex -> plusK (regexNorm rex) - otherRegEx -> otherRegEx - -{- | Parse a `RegEx` from a `String`. - ->>> let str = "xy|z+" ->>> regexParse str -Alternate (Terminal "xy") (KleenePlus (Terminal "z")) - -`Fail` if the `String` is not a valid regular expression. - ->>> let bad = ")(" ->>> regexParse bad -Fail --} -regexParse :: String -> RegEx -regexParse str = case readGrammar regexGrammar str of - [] -> Fail - rex:_ -> regexNorm rex - -{- | The `RegEx` `String`. - ->>> let rex = Alternate (Terminal "xy") (KleenePlus (Terminal "z")) ->>> putStrLn (regexString rex) -xy|z+ --} -regexString :: RegEx -> String -regexString rex = maybe "\\q" id (showGrammar regexGrammar rex) - --- RegEx Generator -- - -newtype DiRegEx a b = DiRegEx RegEx -instance Functor (DiRegEx a) where fmap = rmap -instance Applicative (DiRegEx a) where - pure _ = DiRegEx (Terminal []) - DiRegEx rex1 <*> DiRegEx rex2 = DiRegEx (rex1 -*- rex2) -instance Alternative (DiRegEx a) where - empty = DiRegEx Fail - DiRegEx rex1 <|> DiRegEx rex2 = DiRegEx (rex1 ||| rex2) - many (DiRegEx rex) = DiRegEx (KleeneStar rex) - some (DiRegEx rex) = DiRegEx (KleenePlus rex) -instance Filterable (DiRegEx a) where - mapMaybe _ = coerce -instance Profunctor DiRegEx where - dimap _ _ = coerce -instance Distributor DiRegEx where - zeroP = DiRegEx Fail - DiRegEx rex1 >+< DiRegEx rex2 = DiRegEx (rex1 ||| rex2) - optionalP (DiRegEx rex) = DiRegEx (optK rex) - manyP (DiRegEx rex) = DiRegEx (starK rex) -instance Choice DiRegEx where - left' = coerce - right' = coerce -instance Cochoice DiRegEx where - unleft = coerce - unright = coerce -instance Alternator DiRegEx where - someP (DiRegEx rex) = DiRegEx (plusK rex) -instance Filtrator DiRegEx -instance IsString (DiRegEx () ()) where - fromString str = DiRegEx (Terminal str) -instance Tokenized Char Char DiRegEx where - anyToken = DiRegEx AnyChar -instance Grammatical DiRegEx where - inClass str = DiRegEx (InClass str) - notInClass str = DiRegEx (NotInClass str) - inCategory cat = DiRegEx (InCategory cat) - notInCategory cat = DiRegEx (NotInCategory cat) - --- Grammar Generator -- - -data DiGrammar a b = DiGrammar - { grammarStart :: DiRegEx a b - , grammarRules :: Set (String, RegEx) - } -instance Functor (DiGrammar a) where fmap = rmap -instance Applicative (DiGrammar a) where - pure b = DiGrammar (pure b) mempty - DiGrammar start1 rules1 <*> DiGrammar start2 rules2 = - DiGrammar (start1 <*> start2) (rules1 <> rules2) -instance Alternative (DiGrammar a) where - empty = DiGrammar empty mempty - DiGrammar start1 rules1 <|> DiGrammar start2 rules2 = - DiGrammar (start1 <|> start2) (rules1 <> rules2) - many (DiGrammar start rules) = DiGrammar (many start) rules - some (DiGrammar start rules) = DiGrammar (some start) rules -instance Filterable (DiGrammar a) where - mapMaybe f (DiGrammar start rules) = - DiGrammar (mapMaybe f start) rules -instance Profunctor DiGrammar where - dimap f g (DiGrammar start rules) = - DiGrammar (dimap f g start) rules -instance Distributor DiGrammar where - zeroP = DiGrammar zeroP mempty - DiGrammar start1 rules1 >+< DiGrammar start2 rules2 = - DiGrammar (start1 >+< start2) (rules1 <> rules2) - optionalP (DiGrammar start rules) = - DiGrammar (optionalP start) rules - manyP (DiGrammar start rules) = - DiGrammar (manyP start) rules -instance Choice DiGrammar where - left' = coerce - right' = coerce -instance Cochoice DiGrammar where - unleft = coerce - unright = coerce -instance Alternator DiGrammar where - someP (DiGrammar start rules) = - DiGrammar (someP start) rules -instance Filtrator DiGrammar -instance IsString (DiGrammar () ()) where - fromString str = DiGrammar (fromString str) mempty -instance Tokenized Char Char DiGrammar where - anyToken = DiGrammar anyToken mempty -instance Grammatical DiGrammar where - inClass str = DiGrammar (inClass str) mempty - notInClass str = DiGrammar (notInClass str) mempty - inCategory str = DiGrammar (inCategory str) mempty - rule name gram = - let - start = DiRegEx (NonTerminal name) - DiRegEx newRule = grammarStart gram - rules = insert (name, newRule) (grammarRules gram) - in - DiGrammar start rules - ruleRec name f = - let - start = DiRegEx (NonTerminal name) - gram = f (DiGrammar start mempty) - DiRegEx newRule = grammarStart gram - rules = insert (name, newRule) (grammarRules gram) - in - DiGrammar start rules - --- Generators -- - -{- | Generate a `ReadS` parser from a `Grammar`. -} -genReadS :: Grammar a -> ReadS a -genReadS = runParsor - -{- | Use a `Grammar` to parse a `String`. -} -readGrammar :: Grammar a -> String -> [a] -readGrammar grammar str = - [ a - | (a, remaining) <- genReadS grammar str - , remaining == [] - ] - -{- | Generate `ShowS` printers from a `Grammar`. -} -genShowS :: Alternative f => Grammar a -> a -> f ShowS -genShowS = runPrintor - -{- | Use a `Grammar` to print `String`s. -} -showGrammar :: Alternative f => Grammar a -> a -> f String -showGrammar grammar a = ($ "") <$> genShowS grammar a - -{- | Generate a `RegEx` from a `Grammar`. -This will infinite loop if your `Grammar` includes a `ruleRec`, -otherwise it will inline all rules and produce a regular expression. --} -genRegEx :: Grammar a -> RegEx -genRegEx (DiRegEx rex) = rex - -{- | Generate a context free grammar, -consisting of @"start"@ & named `RegEx` rules, from a `Grammar`. --} -genGrammar :: Grammar a -> [(String, RegEx)] -genGrammar (DiGrammar (DiRegEx start) rules) = - ("start", start) : toList rules - -{- | Print a `Grammar`.-} -printGrammar :: Grammar a -> IO () -printGrammar gram = for_ (genGrammar gram) $ \(name_i, rule_i) -> do - putStr name_i - putStr " = " - putStrLn (regexString rule_i) - --- RegEx Grammar -- - -{- | `regexGrammar` provides an important example of a `Grammar`. -Take a look at the source to see its definition. - ->>> printGrammar regexGrammar -start = \q{regex} -alternate = \q{sequence}(\|\q{sequence})* -any = \. -atom = \q{nonterminal}|\q{fail}|\q{class-in}|\q{class-not-in}|\q{category-in}|\q{category-not-in}|\q{char}|\q{any}|\q{parenthesized} -category = Ll|Lu|Lt|Lm|Lo|Mn|Mc|Me|Nd|Nl|No|Pc|Pd|Ps|Pe|Pi|Pf|Po|Sm|Sc|Sk|So|Zs|Zl|Zp|Cc|Cf|Cs|Co|Cn -category-in = \\p\{\q{category}\} -category-not-in = \\P\{\q{category}\} -char = \q{char-literal}|\q{char-escaped} -char-escaped = \\[\$\(\)\*\+\.\?\[\\\]\^\{\|\}] -char-literal = [^\$\(\)\*\+\.\?\[\\\]\^\{\|\}] -class-in = \[\q{char}*\] -class-not-in = \[\^\q{char}*\] -expression = \q{terminal}|\q{kleene-optional}|\q{kleene-star}|\q{kleene-plus}|\q{atom} -fail = \\q -kleene-optional = \q{atom}\? -kleene-plus = \q{atom}\+ -kleene-star = \q{atom}\* -nonterminal = \\q\{\q{char}*\} -parenthesized = \(\q{regex}\) -regex = \q{alternate} -sequence = \q{expression}* -terminal = \q{char}+ - --} -regexGrammar :: Grammar RegEx -regexGrammar = ruleRec "regex" $ \rex -> altG rex - -altG :: Grammarr RegEx RegEx -altG rex = rule "alternate" $ - chainl1 _Alternate (sepBy "|") (seqG rex) - -anyG :: Grammar RegEx -anyG = rule "any" $ _AnyChar >?< "." - -atomG :: Grammarr RegEx RegEx -atomG rex = rule "atom" $ - nonterminalG - <|> failG - <|> classInG - <|> classNotInG - <|> categoryInG - <|> categoryNotInG - <|> _Terminal >?< charG >:< pure "" - <|> anyG - <|> parenG rex - -categoryG :: Grammar GeneralCategory -categoryG = rule "category" $ - _LowercaseLetter >?< "Ll" - <|> _UppercaseLetter >?< "Lu" - <|> _TitlecaseLetter >?< "Lt" - <|> _ModifierLetter >?< "Lm" - <|> _OtherLetter >?< "Lo" - <|> _NonSpacingMark >?< "Mn" - <|> _SpacingCombiningMark >?< "Mc" - <|> _EnclosingMark >?< "Me" - <|> _DecimalNumber >?< "Nd" - <|> _LetterNumber >?< "Nl" - <|> _OtherNumber >?< "No" - <|> _ConnectorPunctuation >?< "Pc" - <|> _DashPunctuation >?< "Pd" - <|> _OpenPunctuation >?< "Ps" - <|> _ClosePunctuation >?< "Pe" - <|> _InitialQuote >?< "Pi" - <|> _FinalQuote >?< "Pf" - <|> _OtherPunctuation >?< "Po" - <|> _MathSymbol >?< "Sm" - <|> _CurrencySymbol >?< "Sc" - <|> _ModifierSymbol >?< "Sk" - <|> _OtherSymbol >?< "So" - <|> _Space >?< "Zs" - <|> _LineSeparator >?< "Zl" - <|> _ParagraphSeparator >?< "Zp" - <|> _Control >?< "Cc" - <|> _Format >?< "Cf" - <|> _Surrogate >?< "Cs" - <|> _PrivateUse >?< "Co" - <|> _NotAssigned >?< "Cn" - -categoryInG :: Grammar RegEx -categoryInG = rule "category-in" $ - _InCategory >?< "\\p{" >* categoryG *< "}" - -categoryNotInG :: Grammar RegEx -categoryNotInG = rule "category-not-in" $ - _NotInCategory >?< "\\P{" >* categoryG *< "}" - -charG :: Grammar Char -charG = rule "char" $ charLiteralG <|> charEscapedG - -charEscapedG :: Grammar Char -charEscapedG = rule "char-escaped" $ "\\" >* inClass charsReserved - -charLiteralG :: Grammar Char -charLiteralG = rule "char-literal" $ notInClass charsReserved - -charsReserved :: String -charsReserved = "$()*+.?[\\]^{|}" - -classInG :: Grammar RegEx -classInG = rule "class-in" $ - _InClass >?< "[" >* manyP charG *< "]" - -classNotInG :: Grammar RegEx -classNotInG = rule "class-not-in" $ - _NotInClass >?< "[^" >* manyP charG *< "]" - -exprG :: Grammarr RegEx RegEx -exprG rex = rule "expression" $ - terminalG - <|> kleeneOptG rex - <|> kleeneStarG rex - <|> kleenePlusG rex - <|> atomG rex - -failG :: Grammar RegEx -failG = rule "fail" $ _Fail >?< "\\q" - -nonterminalG :: Grammar RegEx -nonterminalG = rule "nonterminal" $ - _NonTerminal >?< "\\q{" >* manyP charG *< "}" - -parenG :: Grammarr a a -parenG rex = rule "parenthesized" $ - "(" >* rex *< ")" - -kleeneOptG :: Grammarr RegEx RegEx -kleeneOptG rex = rule "kleene-optional" $ - _KleeneOpt >?< atomG rex *< "?" - -kleeneStarG :: Grammarr RegEx RegEx -kleeneStarG rex = rule "kleene-star" $ - _KleeneStar >?< atomG rex *< "*" - -kleenePlusG :: Grammarr RegEx RegEx -kleenePlusG rex = rule "kleene-plus" $ - _KleenePlus >?< atomG rex *< "+" - -seqG :: Grammarr RegEx RegEx -seqG rex = rule "sequence" $ - chainl _Sequence (_Terminal . _Empty) noSep (exprG rex) - -terminalG :: Grammar RegEx -terminalG = rule "terminal" $ - _Terminal >?< someP charG diff --git a/test/Examples/Arithmetic.hs b/test/Examples/Arithmetic.hs new file mode 100644 index 0000000..4a40f2d --- /dev/null +++ b/test/Examples/Arithmetic.hs @@ -0,0 +1,45 @@ +module Examples.Arithmetic + ( Arith (..) + , arithGrammar + , arithExamples + ) where + +import Control.Applicative +import Control.Lens +import Control.Lens.Grammar +import Control.Lens.Grammar.BackusNaur +import Control.Lens.Grammar.Symbol +import Control.Lens.Grammar.Token +import Control.Lens.PartialIso +import Data.Profunctor.Distributor +import Data.Profunctor.Monoidal +import Numeric.Natural + +data Arith + = Num Natural + | Add Arith Arith + | Mul Arith Arith + deriving stock (Eq, Ord, Show, Read) + +makePrisms ''Arith + +arithGrammar :: Grammar Char Arith +arithGrammar = ruleRec "arith" sumG + where + sumG arith = rule "sum" $ + chain1 Left _Add (sepBy (terminal "+")) (prodG arith) + prodG arith = rule "product" $ + chain1 Left _Mul (sepBy (terminal "*")) (factorG arith) + factorG arith = rule "factor" $ + number <|> terminal "(" >* arith *< terminal ")" + number = rule "number" $ + _Num . iso show read >? someP (asIn @Char DecimalNumber) + +arithExamples :: [(Arith, String)] +arithExamples = + [ (Num 42, "42") + , (Add (Num 1) (Num 2), "1+2") + , (Add (Mul (Num 2) (Num 3)) (Num 4), "2*3+4") + , (Mul (Num 2) (Add (Num 3) (Num 4)), "2*(3+4)") + , (Add (Add (Num 1) (Mul (Num 2) (Num 3))) (Num 4), "1+2*3+4") + ] diff --git a/test/Examples/Json.hs b/test/Examples/Json.hs new file mode 100644 index 0000000..79cc5f9 --- /dev/null +++ b/test/Examples/Json.hs @@ -0,0 +1,136 @@ +module Examples.Json + ( Json (..) + , jsonGrammar + , jsonExamples + ) where + +import Control.Applicative +import Control.Lens +import Control.Lens.Grammar +import Control.Lens.Grammar.BackusNaur +import Control.Lens.Grammar.Boole +import Control.Lens.Grammar.Symbol +import Control.Lens.Grammar.Token +import Control.Lens.PartialIso +import Data.Profunctor.Distributor +import Data.Profunctor.Monoidal +import qualified Data.Map.Strict as Map +import Data.Map.Strict (Map) +import Numeric.Natural + +-- | Abstract syntax tree for JSON values +data Json + = JNull + | JBool Bool + | JNumber Natural -- simplified to only decimal natural numbers + | JString String + | JArray [Json] + | JObject (Map String Json) + deriving stock (Eq, Ord, Show, Read) + +-- Generate prisms +makePrisms ''Json + +-- | JSON grammar following the McKeeman Form specification from json.org +jsonGrammar :: Grammar Char Json +jsonGrammar = ruleRec "json" elementG + where + -- element = ws value ws + elementG json = rule "element" $ + ws >* valueG json *< ws + + -- value = object | array | string | number | "true" | "false" | "null" + valueG json = rule "value" $ choice + [ _JNull >? terminal "null" + , _JBool . only True >? terminal "true" + , _JBool . only False >? terminal "false" + , _JNumber >? numberG + , _JString >? stringG + , _JArray >? arrayG json + , _JObject >? objectG json + ] + + -- object = '{' ws '}' | '{' members '}' + objectG json = rule "object" $ choice + [ only Map.empty >? + terminal "{" >* ws >* terminal "}" + , iso Map.toList Map.fromList >~ + terminal "{" >* membersG json *< terminal "}" + ] + + -- members = member | member ',' members + membersG json = rule "members" $ + several1 (sepBy (terminal ",")) (memberG json) + + -- member = ws string ws ':' element + memberG json = rule "member" $ + ws >* stringG *< ws *< terminal ":" >*< elementG json + + -- array = '[' ws ']' | '[' elements ']' + arrayG json = rule "array" $ choice + [ only [] >? terminal "[" >* ws >* terminal "]" + , terminal "[" >* elementsG json *< terminal "]" + ] + + -- elements = element | element ',' elements + elementsG json = rule "elements" $ + several1 (sepBy (terminal ",")) (elementG json) + + -- string = '"' characters '"' + stringG = rule "string" $ + terminal "\"" >* manyP characterG *< terminal "\"" + + -- character = '0020' . '10FFFF' - '"' - '\' | '\' escape + characterG = rule "character" $ + tokenClass (oneOf ['\x0020' .. '\x10FFFF'] >&&< notOneOf ['\"','\\']) + <|> terminal "\\" >* escapeG + + -- escape = '"' | '\' | '/' | 'b' | 'f' | 'n' | 'r' | 't' + escapeG = rule "escape" $ choice + [ only '"' >? terminal "\"" + , only '\\' >? terminal "\\" + , only '/' >? terminal "/" + , only '\b' >? terminal "b" + , only '\f' >? terminal "f" + , only '\n' >? terminal "n" + , only '\r' >? terminal "r" + , only '\t' >? terminal "t" + ] + + -- number = decimal natural number + numberG = rule "number" $ + iso show read >~ someP (asIn @Char DecimalNumber) + + -- Simplified: zero or more whitespace characters + ws = rule "ws" $ + iso (\() -> "") (\_ -> ()) >~ manyP (token @Char ' ') + +-- | Example JSON values for testing +jsonExamples :: [(Json, String)] +jsonExamples = + [ (JNull, "null") + , (JBool True, "true") + , (JBool False, "false") + , (JNumber 0, "0") + , (JNumber 42, "42") + , (JString "", "\"\"") + , (JString "hello", "\"hello\"") + , (JString "hello world", "\"hello world\"") + , (JString "\"quoted\"", "\"\\\"quoted\\\"\"") + , (JString "line1\nline2", "\"line1\\nline2\"") + , (JArray [], "[]") + , (JArray [JNumber 1, JNumber 2, JNumber 3], "[1,2,3]") + , (JArray [JBool True, JBool False], "[true,false]") + , (JObject Map.empty, "{}") + , (JObject (Map.fromList [("key", JString "value")]), "{\"key\":\"value\"}") + , (JObject (Map.fromList [("a", JNumber 1), ("b", JNumber 2)]), + "{\"a\":1,\"b\":2}") + , (JObject (Map.fromList + [ ("name", JString "Alice") + , ("age", JNumber 30) + , ("active", JBool True) + ]), "{\"active\":true,\"age\":30,\"name\":\"Alice\"}") + , (JArray [JObject (Map.fromList [("x", JNumber 1)]), + JObject (Map.fromList [("x", JNumber 2)])], + "[{\"x\":1},{\"x\":2}]") + ] diff --git a/test/Examples/Lambda.hs b/test/Examples/Lambda.hs new file mode 100644 index 0000000..bee41e3 --- /dev/null +++ b/test/Examples/Lambda.hs @@ -0,0 +1,107 @@ +module Examples.Lambda + ( Lambda (..) + , lambdaGrammar + , lambdaExamples + ) where + +import Control.Lens +import Control.Lens.Grammar +import Control.Lens.Grammar.BackusNaur +import Control.Lens.Grammar.Symbol +import Control.Lens.Grammar.Token +import Control.Lens.PartialIso +import Data.Profunctor.Distributor +import Data.Profunctor.Monoidal + +-- | Abstract syntax tree for lambda calculus terms +data Lambda + = Var String -- ^ Variable + | Lam String Lambda -- ^ Lambda abstraction (\\x.body) + | App Lambda Lambda -- ^ Function application + deriving stock (Eq, Ord, Show, Read) + +-- Generate prisms +makePrisms ''Lambda + +-- | Grammar for untyped lambda calculus +lambdaGrammar :: Grammar Char Lambda +lambdaGrammar = ruleRec "lambda" termG + where + -- Top level term: lambda abstraction or application + termG term = rule "term" $ choice + [ lamG term + , appG term + ] + + -- Lambda abstraction: \x.body + lamG term = rule "lambda" $ + _Lam >? terminal "\\" >* varNameG *< terminal "." >*< term + + -- Application: left-associative chain of atoms + -- e.g., "f x y" parses as "(f x) y" + appG term = rule "application" $ + chain1 Left _App (sepBy (terminal " ")) (atomG term) + + -- Atomic term: variable or parenthesized term + atomG term = rule "atom" $ choice + [ _Var >? varNameG + , terminal "(" >* term *< terminal ")" + ] + + -- Variable name: starts with lowercase letter, + -- followed by alphanumeric or underscore + varNameG = rule "varname" $ asIn LowercaseLetter >:< + manyP (choice (token '_' : map asIn [LowercaseLetter, UppercaseLetter, DecimalNumber])) + +-- | Example lambda calculus terms for testing +lambdaExamples :: [(Lambda, String)] +lambdaExamples = + -- Variables + [ (Var "x", "x") + , (Var "y", "y") + , (Var "foo", "foo") + , (Var "x1", "x1") + + -- Simple lambda abstractions + , (Lam "x" (Var "x"), "\\x.x") -- Identity + , (Lam "x" (Lam "y" (Var "x")), "\\x.\\y.x") -- K combinator + , (Lam "x" (Lam "y" (Var "y")), "\\x.\\y.y") -- K* combinator + + -- Applications + , (App (Var "f") (Var "x"), "f x") + , (App (App (Var "f") (Var "x")) (Var "y"), "f x y") + + -- Lambda with application in body + , (Lam "f" (Lam "x" (App (Var "f") (Var "x"))), + "\\f.\\x.f x") + + -- S combinator: \x.\y.\z.x z (y z) + , (Lam "x" (Lam "y" (Lam "z" + (App (App (Var "x") (Var "z")) + (App (Var "y") (Var "z"))))), + "\\x.\\y.\\z.x z (y z)") + + -- Omega combinator: (\x.x x)(\x.x x) + , (App (Lam "x" (App (Var "x") (Var "x"))) + (Lam "x" (App (Var "x") (Var "x"))), + "(\\x.x x) (\\x.x x)") + + -- Church numeral 0: \f.\x.x + , (Lam "f" (Lam "x" (Var "x")), + "\\f.\\x.x") + + -- Church numeral 1: \f.\x.f x + , (Lam "f" (Lam "x" (App (Var "f") (Var "x"))), + "\\f.\\x.f x") + + -- Church numeral 2: \f.\x.f (f x) + , (Lam "f" (Lam "x" + (App (Var "f") (App (Var "f") (Var "x")))), + "\\f.\\x.f (f x)") + + -- Y combinator: \f.(\x.f (x x)) (\x.f (x x)) + , (Lam "f" + (App (Lam "x" (App (Var "f") (App (Var "x") (Var "x")))) + (Lam "x" (App (Var "f") (App (Var "x") (Var "x"))))), + "\\f.(\\x.f (x x)) (\\x.f (x x))") + ] diff --git a/test/Examples/LenVec.hs b/test/Examples/LenVec.hs new file mode 100644 index 0000000..a37b608 --- /dev/null +++ b/test/Examples/LenVec.hs @@ -0,0 +1,33 @@ +module Examples.LenVec + ( LenVec + , lenvecGrammar + , lenvecExamples + ) where + +import Control.Lens.Grammar +import Control.Lens.Grammar.Symbol +import Control.Lens.Grammar.Token +import Control.Lens.PartialIso +import Data.Profunctor.Distributor +import qualified Data.Profunctor.Monadic as P +import Numeric.Natural + +data LenVec = LenVec {length :: Natural, vector :: [Natural]} + deriving (Eq, Ord, Show, Read) + +makeNestedPrisms ''LenVec + +lenvecGrammar :: CtxGrammar Char LenVec +lenvecGrammar = _LenVec >? P.do + let + numberG = iso show read >~ someP (asIn @Char DecimalNumber) + vectorG n = intercalateP n (sepBy (terminal ",")) numberG + len <- numberG -- bonds to _LenVec + terminal ";" -- doesn't bond + vectorG (fromIntegral len) -- bonds to _LenVec + +lenvecExamples :: [(LenVec, String)] +lenvecExamples = + [ (LenVec 3 [1,2,3], "3;1,2,3") + , (LenVec 0 [], "0;") + ] diff --git a/test/Examples/RegString.hs b/test/Examples/RegString.hs new file mode 100644 index 0000000..3e6a23e --- /dev/null +++ b/test/Examples/RegString.hs @@ -0,0 +1,66 @@ +module Examples.RegString + ( regexExamples + ) where + +import Control.Lens.Grammar +import Control.Lens.Grammar.Boole +import Control.Lens.Grammar.Kleene +import Control.Lens.Grammar.Symbol +import Control.Lens.Grammar.Token + +regexExamples :: [(RegString, String)] +regexExamples = + [ (terminal "abc123etc.", "abc123etc.") + , (terminal "x" <> terminal "y", "xy") + , (zeroK, "[]") + , (terminal "x" >|< terminal "y", "x|y") + , (optK (terminal "x"), "x?") + , (starK (terminal "x"), "x*") + , (plusK (terminal "x"), "x+") + , (anyToken, "[^]") + , (oneOf "abc", "[abc]") + , (notOneOf "abc", "[^abc]") + , (asIn UppercaseLetter, "\\p{Lu}") + , (notAsIn LowercaseLetter, "\\P{Ll}") + , (nonTerminal "rule-name", "\\q{rule-name}") + , (terminal "", "") + , (optK (terminal "abc"), "(abc)?") + , (optK (terminal "abc") <> nonTerminal "xyz", "(abc)?\\q{xyz}") + + -- Boolean OR (>||<) operations + , (tokenClass (oneOf "abc" >||< oneOf "xyz"), "[abcxyz]") + , (tokenClass (notOneOf "abc" >||< notOneOf "xyz"), "[^]") + , (tokenClass (oneOf "abc" >||< notOneOf "xyz"), "[abc]|[^xyz]") + , (tokenClass (notOneOf "abc" >||< oneOf "xyz"), "[^abc]|[xyz]") + , (tokenClass (asIn UppercaseLetter >||< asIn LowercaseLetter), "\\p{Lu}|\\p{Ll}") + , (tokenClass (notAsIn Control >||< notAsIn Space), "[^]") + , (tokenClass (oneOf "abc" >||< asIn DecimalNumber), "[abc]|\\p{Nd}") + , (tokenClass (notOneOf "xyz" >||< notAsIn UppercaseLetter), "[^]") + + -- Boolean AND (>&&<) operations + , (tokenClass (oneOf "abcdef" >&&< oneOf "def123"), "[def]") + , (tokenClass (notOneOf "abc" >&&< notOneOf "xyz"), "[^abcxyz]") + , (tokenClass (oneOf "abcd" >&&< notOneOf "cd"), "[ab]") + , (tokenClass (notOneOf "abc" >&&< asIn LowercaseLetter), "[^abc\\p{Ll}]") + , (tokenClass (notOneOf "abc" >&&< notAsIn Control), "[^abc\\P{Cc}]") + , (tokenClass (asIn UppercaseLetter >&&< notOneOf "XYZ"), "[^XYZ\\p{Lu}]") + , (tokenClass (notAsIn Control >&&< notAsIn Space), "\\P{Zs|Cc}") + , (tokenClass (oneOf "0123456789" >&&< asIn DecimalNumber), "[0123456789]") + + -- Boolean NOT (notB) operations + , (tokenClass (notB (oneOf "abc")), "[^abc]") + , (tokenClass (notB (notOneOf "abc")), "[abc]") + , (tokenClass (notB (oneOf "abc" >||< oneOf "xyz")), "[^abcxyz]") + , (tokenClass (notB (asIn UppercaseLetter)), "\\P{Lu}") + , (tokenClass (notB (notAsIn Control)), "\\p{Cc}") + , (tokenClass (notB (notOneOf "abc" >&&< asIn LowercaseLetter)), "[abc]|\\P{Ll}") + + -- fromBool operations + , (tokenClass (fromBool True), "[^]") + , (tokenClass (fromBool False), "[]") + + -- Complex combinations + , (tokenClass (notOneOf "abc" >&&< (asIn LowercaseLetter >||< asIn UppercaseLetter)), "[^abc\\p{Ll}]|\\p{Lu}") + , (tokenClass ((oneOf "123" >||< asIn DecimalNumber) >&&< notOneOf "789"), "[123]|[^789\\p{Nd}]") + , (tokenClass (notB (oneOf "abc" >||< asIn MathSymbol)), "[^abc\\P{Sm}]") + ] diff --git a/test/Examples/SExpr.hs b/test/Examples/SExpr.hs new file mode 100644 index 0000000..84ebe5b --- /dev/null +++ b/test/Examples/SExpr.hs @@ -0,0 +1,65 @@ +module Examples.SExpr + ( SExpr (..) + , sexprGrammar + , sexprExamples + ) where + +import Control.Lens hiding (List) +import Control.Lens.Grammar +import Control.Lens.Grammar.BackusNaur +import Control.Lens.Grammar.Boole +import Control.Lens.Grammar.Symbol +import Control.Lens.Grammar.Token +import Control.Lens.PartialIso hiding (List) +import Data.Profunctor.Distributor +import Data.Profunctor.Monoidal + +-- | Abstract syntax tree for S-expressions +data SExpr + = Atom String -- ^ Atomic symbol + | List [SExpr] -- ^ List of S-expressions + deriving stock (Eq, Ord, Show, Read) + +-- Generate prisms +makePrisms ''SExpr + +-- | Grammar for S-expressions +sexprGrammar :: Grammar Char SExpr +sexprGrammar = ruleRec "sexpr" $ \sexpr -> choice + [ _Atom >? atomG + , _List >? listG sexpr + ] + where + -- Atom: one or more alphanumeric or symbol characters + atomG = rule "atom" $ someP (tokenClass atomChars) + + -- List: parenthesized sequence of S-expressions + -- Elements are separated by whitespace + listG sexpr = rule "list" $ + terminal "(" >* several (sepBy (terminal " ")) sexpr *< terminal ")" + + -- Characters allowed in atoms: letters, digits, and symbols + atomChars = + oneOf (['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] ++ "_+-*/<>=!?") + +-- | Example S-expressions for testing +sexprExamples :: [(SExpr, String)] +sexprExamples = + [ (Atom "foo", "foo") + , (Atom "x", "x") + , (Atom "+", "+") + , (Atom "define", "define") + , (List [], "()") + , (List [Atom "foo"], "(foo)") + , (List [Atom "foo", Atom "bar"], "(foo bar)") + , (List [Atom "foo", List [Atom "bar", Atom "baz"]], + "(foo (bar baz))") + , (List [Atom "define", List [Atom "square", Atom "x"], + List [Atom "*", Atom "x", Atom "x"]], + "(define (square x) (* x x))") + , (List [Atom "+", Atom "1", Atom "2"], "(+ 1 2)") + , (List [Atom "if", Atom "test", + List [Atom "then-branch"], + List [Atom "else-branch"]], + "(if test (then-branch) (else-branch))") + ] diff --git a/test/Examples/SemVer.hs b/test/Examples/SemVer.hs new file mode 100644 index 0000000..ffc5792 --- /dev/null +++ b/test/Examples/SemVer.hs @@ -0,0 +1,97 @@ +module Examples.SemVer + ( SemVer (..) + , semverGrammar + , semverCtxGrammar + , semverExamples + ) where + +import Control.Applicative +import Control.Lens.Grammar +import Control.Lens.Grammar.Symbol +import Control.Lens.Grammar.Token +import Control.Lens.PartialIso +import Data.Profunctor.Distributor +import qualified Data.Profunctor.Monadic as P +import Data.Profunctor.Monoidal +import Numeric.Natural + +-- | Semantic version structure following semver.org specification +-- Format: ..[-][+] +-- Example: 1.2.3-alpha.1+build.123 +data SemVer = SemVer + { semverMajor :: Natural -- e.g., 1 + , semverMinor :: Natural -- e.g., 2 + , semverPatch :: Natural -- e.g., 3 + , semverPreRelease :: [String] -- e.g., "alpha.1", "rc.2" + , semverBuildMetadata :: [String] -- e.g., "build.123", "20130313144700" + } + deriving (Eq, Ord, Show, Read) + +makeNestedPrisms ''SemVer + +-- | Grammar for semantic versions following semver.org specification +-- Regular grammar: +-- semver = version ["-" prerelease] ["+" buildmetadata] +-- version = number "." number "." number +-- number = digit+ +-- prerelease = identifier ("." identifier)* +-- buildmetadata = identifier ("." identifier)* +-- identifier = [0-9A-Za-z-]+ +semverGrammar :: RegGrammar Char SemVer +semverGrammar = _SemVer + >? numberG + >*< terminal "." >* numberG + >*< terminal "." >* numberG + >*< option [] (terminal "-" >* identifiersG) + >*< option [] (terminal "+" >* identifiersG) + where + numberG = iso show read >~ someP (asIn @Char DecimalNumber) + identifiersG = several1 (sepBy (terminal ".")) (someP charG) + charG = asIn LowercaseLetter + <|> asIn UppercaseLetter + <|> asIn DecimalNumber + <|> token '-' + +-- Context-sensitive SemVer to test qualified-do +semverCtxGrammar :: CtxGrammar Char SemVer +semverCtxGrammar = _SemVer >? P.do + let + numberG = iso show read >~ someP (asIn @Char DecimalNumber) + identifiersG = several1 (sepBy (terminal ".")) (someP charG) + charG = asIn LowercaseLetter + <|> asIn UppercaseLetter + <|> asIn DecimalNumber + <|> token '-' + _ <- numberG + _ <- terminal "." >* numberG + _ <- terminal "." >* numberG + _ <- option [] (terminal "-" >* identifiersG) + option [] (terminal "+" >* identifiersG) + +semverExamples :: [(SemVer, String)] +semverExamples = + [ (SemVer 0 0 1 [] [], + "0.0.1") + , (SemVer 1 0 0 [] [], + "1.0.0") + , (SemVer 1 2 3 [] [], + "1.2.3") + , (SemVer 1 0 0 ["alpha"] [], + "1.0.0-alpha") + , (SemVer 1 0 0 ["alpha", "1"] [], + "1.0.0-alpha.1") + , (SemVer 1 0 0 ["0", "3", "7"] [], + "1.0.0-0.3.7") + , (SemVer 1 0 0 ["x", "7", "z", "92"] [], + "1.0.0-x.7.z.92") + , (SemVer 1 0 0 [] ["20130313144700"], + "1.0.0+20130313144700") + , (SemVer 1 0 0 [] ["exp", "sha", "5114f85"], + "1.0.0+exp.sha.5114f85") + , (SemVer 1 0 0 ["beta"] ["exp", "sha", "5114f85"], + "1.0.0-beta+exp.sha.5114f85") + , (SemVer 1 0 0 ["beta", "11"] ["exp", "sha", "5114f85"], + "1.0.0-beta.11+exp.sha.5114f85") + , (SemVer 2 1 5 ["rc", "1"] ["build", "123"], + "2.1.5-rc.1+build.123") + ] diff --git a/test/Main.hs b/test/Main.hs new file mode 100644 index 0000000..06bb306 --- /dev/null +++ b/test/Main.hs @@ -0,0 +1,92 @@ +module Main (main) where + +import Data.Foldable hiding (toList) +import Data.Maybe (listToMaybe) +import Control.Lens.Grammar +import Test.DocTest +import Test.Hspec + +import Examples.RegString +import Examples.Arithmetic +import Examples.Json +import Examples.SExpr +import Examples.Lambda +import Examples.LenVec +import Examples.SemVer + +main :: IO () +main = do + doctests + hspec $ do + testGrammar "regexGrammar" regexGrammar regexExamples + testGrammar "semverGrammar" semverGrammar semverExamples + testGrammar "semverCtxGrammar" semverCtxGrammar semverExamples + testGrammar "arithGrammar" arithGrammar arithExamples + testGrammar "jsonGrammar" jsonGrammar jsonExamples + testGrammar "sexprGrammar" sexprGrammar sexprExamples + testGrammar "lambdaGrammar" lambdaGrammar lambdaExamples + testGrammar "lenvecGrammar" lenvecGrammar lenvecExamples + +doctests :: IO () +doctests = do + let + modulePaths = + [ "src/Control/Lens/Grammar.hs" + , "src/Control/Lens/Grammar/Token.hs" + ] + languageExtensions = + [ "-XAllowAmbiguousTypes" + , "-XArrows" + , "-XConstraintKinds" + , "-XDataKinds" + , "-XDefaultSignatures" + , "-XDeriveFoldable" + , "-XDeriveFunctor" + , "-XDeriveTraversable" + , "-XDeriveGeneric" + , "-XDerivingStrategies" + , "-XDerivingVia" + , "-XEmptyCase" + , "-XFlexibleContexts" + , "-XFlexibleInstances" + , "-XFunctionalDependencies" + , "-XGADTs" + , "-XGeneralizedNewtypeDeriving" + , "-XImportQualifiedPost" + , "-XImpredicativeTypes" + , "-XInstanceSigs" + , "-XLambdaCase" + , "-XMagicHash" + , "-XMonoLocalBinds" + , "-XQuantifiedConstraints" + , "-XRankNTypes" + , "-XRecursiveDo" + , "-XScopedTypeVariables" + , "-XStandaloneDeriving" + , "-XStandaloneKindSignatures" + , "-XTemplateHaskell" + , "-XTupleSections" + , "-XTypeApplications" + , "-XTypeFamilies" + , "-XTypeOperators" + , "-XUndecidableInstances" + , "-XUndecidableSuperClasses" + ] + for_ modulePaths $ \modulePath -> do + putStr "Testing module documentation in " + putStrLn modulePath + doctest (modulePath : languageExtensions) + +testGrammar :: (Show a, Eq a) => String -> CtxGrammar Char a -> [(a, String)] -> Spec +testGrammar name grammar examples = + describe name $ + for_ examples $ \(expectedSyntax, expectedString) -> do + it ("should parse from " <> expectedString <> " correctly") $ do + let actualSyntax = [parsed | (parsed, "") <- parseG grammar expectedString] + listToMaybe actualSyntax `shouldBe` Just expectedSyntax + it ("should unparse to " <> expectedString <> " correctly") $ do + let actualString = unparseG grammar expectedSyntax "" + actualString `shouldBe` Just expectedString + it ("should print to " <> expectedString <> " correctly") $ do + let actualString = ($ "") <$> printG grammar expectedSyntax + actualString `shouldBe` Just expectedString diff --git a/test/Spec.hs b/test/Spec.hs deleted file mode 100644 index 311c6cf..0000000 --- a/test/Spec.hs +++ /dev/null @@ -1,64 +0,0 @@ -module Main (main) where - -import Data.Char -import Data.Foldable -import Data.List (nub) -import Text.Grammar.Distributor -import Test.Hspec - -expectedRegexGrammar :: [(String, RegEx)] -expectedRegexGrammar = - [("start",NonTerminal "regex") - ,("alternate",Sequence (NonTerminal "sequence") (KleeneStar (Sequence (Terminal "|") (NonTerminal "sequence")))) - ,("any",Terminal ".") - ,("atom",Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (NonTerminal "nonterminal") (NonTerminal "fail")) (NonTerminal "class-in")) (NonTerminal "class-not-in")) (NonTerminal "category-in")) (NonTerminal "category-not-in")) (NonTerminal "char")) (NonTerminal "any")) (NonTerminal "parenthesized")) - ,("category",Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Alternate (Terminal "Ll") (Terminal "Lu")) (Terminal "Lt")) (Terminal "Lm")) (Terminal "Lo")) (Terminal "Mn")) (Terminal "Mc")) (Terminal "Me")) (Terminal "Nd")) (Terminal "Nl")) (Terminal "No")) (Terminal "Pc")) (Terminal "Pd")) (Terminal "Ps")) (Terminal "Pe")) (Terminal "Pi")) (Terminal "Pf")) (Terminal "Po")) (Terminal "Sm")) (Terminal "Sc")) (Terminal "Sk")) (Terminal "So")) (Terminal "Zs")) (Terminal "Zl")) (Terminal "Zp")) (Terminal "Cc")) (Terminal "Cf")) (Terminal "Cs")) (Terminal "Co")) (Terminal "Cn")) - ,("category-in",Sequence (Sequence (Terminal "\\p{") (NonTerminal "category")) (Terminal "}")) - ,("category-not-in",Sequence (Sequence (Terminal "\\P{") (NonTerminal "category")) (Terminal "}")) - ,("char",Alternate (NonTerminal "char-literal") (NonTerminal "char-escaped")) - ,("char-escaped",Sequence (Terminal "\\") (InClass "$()*+.?[\\]^{|}")) - ,("char-literal",NotInClass "$()*+.?[\\]^{|}") - ,("class-in",Sequence (Sequence (Terminal "[") (KleeneStar (NonTerminal "char"))) (Terminal "]")) - ,("class-not-in",Sequence (Sequence (Terminal "[^") (KleeneStar (NonTerminal "char"))) (Terminal "]")) - ,("expression",Alternate (Alternate (Alternate (Alternate (NonTerminal "terminal") (NonTerminal "kleene-optional")) (NonTerminal "kleene-star")) (NonTerminal "kleene-plus")) (NonTerminal "atom")) - ,("fail",Terminal "\\q") - ,("kleene-optional",Sequence (NonTerminal "atom") (Terminal "?")) - ,("kleene-plus",Sequence (NonTerminal "atom") (Terminal "+")) - ,("kleene-star",Sequence (NonTerminal "atom") (Terminal "*")) - ,("nonterminal",Sequence (Sequence (Terminal "\\q{") (KleeneStar (NonTerminal "char"))) (Terminal "}")) - ,("parenthesized",Sequence (Sequence (Terminal "(") (NonTerminal "regex")) (Terminal ")")) - ,("regex",NonTerminal "alternate") - ,("sequence",KleeneStar (NonTerminal "expression")) - ,("terminal",KleenePlus (NonTerminal "char")) - ] - -regexExamples :: [(RegEx, String)] -regexExamples = - [ (Terminal "abc123etc.", "abc123etc\\.") - , (Sequence (Terminal "x") (Terminal "y"), "xy") - , (Fail, "\\q") - , (Alternate (Terminal "x") (Terminal "y"), "x|y") - , (KleeneOpt (Terminal "x"), "x?") - , (KleeneStar (Terminal "x"), "x*") - , (KleenePlus (Terminal "x"), "x+") - , (AnyChar, ".") - , (InClass "abc", "[abc]") - , (NotInClass "abc", "[^abc]") - , (InCategory UppercaseLetter, "\\p{Lu}") - , (NotInCategory LowercaseLetter, "\\P{Ll}") - , (NonTerminal "rule-name", "\\q{rule-name}") - , (Terminal "", "") - ] - -main :: IO () -main = hspec $ do - describe "regexGrammar" $ do - it "should generate a correct grammar" $ - genGrammar regexGrammar `shouldBe` expectedRegexGrammar - for_ regexExamples $ \(rex, str) -> do - it ("should print " <> show rex <> " correctly") $ - showGrammar regexGrammar rex `shouldBe` Just str - it ("should parse " <> str <> " correctly") $ do - let parses = readGrammar regexGrammar str - parses `shouldSatisfy` elem rex - length (nub (map regexNorm parses)) `shouldBe` 1