From 25eea41631175c48bdf508f299b358e666561cd7 Mon Sep 17 00:00:00 2001 From: Yuras Shumovich Date: Sun, 7 Sep 2025 12:33:49 +0200 Subject: [PATCH 1/9] make it compile --- compat/Data/ByteString/Unsafe.hs | 15 +++++++++++++++ lib/Scanner.hs | 13 +++++++++++++ lib/Scanner/Internal.hs | 2 ++ scanner.cabal | 1 + 4 files changed, 31 insertions(+) create mode 100644 compat/Data/ByteString/Unsafe.hs diff --git a/compat/Data/ByteString/Unsafe.hs b/compat/Data/ByteString/Unsafe.hs new file mode 100644 index 0000000..ab02960 --- /dev/null +++ b/compat/Data/ByteString/Unsafe.hs @@ -0,0 +1,15 @@ +{-# LANGUAGE PackageImports #-} +{-# LANGUAGE CPP #-} + +module Data.ByteString.Unsafe +( unsafeDrop +) where + +#ifdef __MHS__ +import Data.ByteString (ByteString) +import Data.ByteString qualified as ByteString +unsafeDrop :: Int -> ByteString -> ByteString +unsafeDrop = ByteString.drop +#else +import "bytestring" Data.ByteString.Unsafe +#endif diff --git a/lib/Scanner.hs b/lib/Scanner.hs index 7cfd18f..e72408e 100644 --- a/lib/Scanner.hs +++ b/lib/Scanner.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} -- | Fast not-backtracking incremental scanner for bytestrings -- @@ -13,7 +14,9 @@ module Scanner , Result (..) , scan , scanOnly +#ifndef __MHS__ , scanLazy +#endif , scanWith , anyWord8 , anyChar8 @@ -42,10 +45,18 @@ import Data.Word import qualified Data.Char as Char import Data.ByteString (ByteString) import qualified Data.ByteString as ByteString +#ifndef __MHS__ import qualified Data.ByteString.Lazy as Lazy (ByteString) import qualified Data.ByteString.Lazy as Lazy.ByteString +#endif import Control.Monad + +#ifdef __MHS__ +unsafeChr :: Int -> Char +unsafeChr = Char.chr +#else import GHC.Base (unsafeChr) +#endif -- | Scan the complete input, without resupplying scanOnly :: Scanner a -> ByteString -> Either String a @@ -56,6 +67,7 @@ scanOnly s bs = go (scan s bs) Fail _ err -> Left err More more -> go (more ByteString.empty) +#ifndef __MHS__ -- | Scan lazy bytestring by resupplying scanner with chunks scanLazy :: Scanner a -> Lazy.ByteString -> Either String a scanLazy s lbs = go (scan s) (Lazy.ByteString.toChunks lbs) @@ -68,6 +80,7 @@ scanLazy s lbs = go (scan s) (Lazy.ByteString.toChunks lbs) Done _ r -> Right r Fail _ err -> Left err More more' -> go more' chunks' +#endif -- | Scan with the provided resupply action scanWith :: Monad m => m ByteString -> Scanner a -> ByteString -> m (Result a) diff --git a/lib/Scanner/Internal.hs b/lib/Scanner/Internal.hs index b2fb829..5afc419 100644 --- a/lib/Scanner/Internal.hs +++ b/lib/Scanner/Internal.hs @@ -87,6 +87,7 @@ anyWord8 = Scanner $ \bs next -> Just (c, bs') -> next bs' c _ -> More $ \bs' -> slowPath bs' next where + slowPath :: ByteString -> Next Word8 r -> Result r slowPath bs next = case ByteString.uncons bs of Just (c, bs') -> next bs' c @@ -174,6 +175,7 @@ lookAhead = Scanner $ \bs next -> Just (c, _) -> next bs (Just c) _ -> More $ \bs' -> slowPath bs' next where + slowPath :: ByteString -> Next (Maybe Word8) r -> Result r slowPath bs next = case ByteString.uncons bs of Just (c, _) -> next bs (Just c) diff --git a/scanner.cabal b/scanner.cabal index 6f17d15..8d677c1 100644 --- a/scanner.cabal +++ b/scanner.cabal @@ -23,6 +23,7 @@ library Scanner.Internal other-modules: Prelude Data.Either + Data.ByteString.Unsafe Scanner.OctetPredicates build-depends: base <5 , fail From 105d5bf82cb60576c3106ae77bdd35c0735c3fe9 Mon Sep 17 00:00:00 2001 From: Yuras Shumovich Date: Sun, 7 Sep 2025 12:44:58 +0200 Subject: [PATCH 2/9] disable some tests, not the test suite compiles --- spec/spec.hs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/spec/spec.hs b/spec/spec.hs index 1786429..b151593 100644 --- a/spec/spec.hs +++ b/spec/spec.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE CPP #-} module Main ( main @@ -10,7 +11,9 @@ import Scanner import Prelude hiding (take, takeWhile) import Data.Either import qualified Data.ByteString as ByteString +#ifndef __MHS__ import qualified Data.ByteString.Lazy as Lazy.ByteString +#endif import Test.Hspec main :: IO () @@ -28,6 +31,7 @@ anyWord8Spec = describe "anyWord8" $ do let bs = ByteString.pack [42, 43] scanOnly anyWord8 bs `shouldBe` Right 42 +#ifndef __MHS__ it "should consume the current byte" $ do let bs = ByteString.pack [42, 43] scanOnly (anyWord8 *> anyWord8) bs `shouldBe` Right 43 @@ -45,6 +49,7 @@ anyWord8Spec = describe "anyWord8" $ do , ByteString.pack [43] ] scanLazy (anyWord8 *> anyWord8) bs `shouldBe` Right 43 +#endif it "should fail on end of input" $ do let bs = ByteString.empty @@ -56,12 +61,14 @@ stringSpec = describe "string" $ do let bs = "hello world" scanOnly (string "hello" *> anyWord8) bs `shouldBe` Right 32 +#ifndef __MHS__ it "should ask for more input" $ do let bs = Lazy.ByteString.fromChunks [ "hel" , "lo" ] scanLazy (string "hello") bs `shouldBe` Right () +#endif it "should fail on wrong input" $ do let bs = "helo world" @@ -73,6 +80,7 @@ takeSpec = describe "take" $ do let bs = "hello world" scanOnly (take 5) bs `shouldBe` Right "hello" +#ifndef __MHS__ it "should ask for more input" $ do let bs = Lazy.ByteString.fromChunks [ "he" @@ -91,6 +99,7 @@ takeSpec = describe "take" $ do , "l" ] scanLazy (take 5) bs' `shouldSatisfy` isLeft +#endif takeWhileSpec :: Spec takeWhileSpec = describe "takeWhile" $ do @@ -98,6 +107,7 @@ takeWhileSpec = describe "takeWhile" $ do let bs = "hello world" scanOnly (takeWhile (/= 32)) bs `shouldBe` Right "hello" +#ifndef __MHS__ it "should ask for more input" $ do let bs = Lazy.ByteString.fromChunks [ "he" @@ -105,6 +115,7 @@ takeWhileSpec = describe "takeWhile" $ do , "lo world" ] scanLazy (takeWhile (/= 32)) bs `shouldBe` Right "hello" +#endif it "should return everything is predicate where becomes False" $ do let bs = "hello" @@ -124,12 +135,14 @@ lookAheadSpec = describe "lookAhead" $ do let bs = ByteString.pack [42, 43] scanOnly (lookAhead *> anyWord8) bs `shouldBe` Right 42 +#ifndef __MHS__ it "should ask for more input" $ do let bs = Lazy.ByteString.fromChunks [ ByteString.pack [42] , ByteString.pack [43] ] scanLazy (anyWord8 *> lookAhead) bs `shouldBe` Right (Just 43) +#endif scanWithSpec :: Spec scanWithSpec = describe "scanWith" $ do From 086da3d5168ab280e29d87d59abfcc7f9d733dea Mon Sep 17 00:00:00 2001 From: Yuras Shumovich Date: Sun, 7 Sep 2025 12:47:39 +0200 Subject: [PATCH 3/9] up --- lib/Scanner/Internal.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Scanner/Internal.hs b/lib/Scanner/Internal.hs index 5afc419..dd41490 100644 --- a/lib/Scanner/Internal.hs +++ b/lib/Scanner/Internal.hs @@ -66,11 +66,13 @@ instance Monad Scanner where run s1 bs $ \bs' a -> run (s2 a) bs' next +#ifndef __MHS__ #if !(MIN_VERSION_base(4,13,0)) {-# INLINE fail #-} fail err = Scanner $ \bs _ -> Fail bs err #endif +#endif instance MonadFail Scanner where {-# INLINE fail #-} From 5744c191494370d7b244fdcab80a63a2996e7e76 Mon Sep 17 00:00:00 2001 From: Yuras Shumovich Date: Sun, 7 Sep 2025 13:04:44 +0200 Subject: [PATCH 4/9] missing file --- portable_hspec/Test/Hspec.hs | 55 ++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 portable_hspec/Test/Hspec.hs diff --git a/portable_hspec/Test/Hspec.hs b/portable_hspec/Test/Hspec.hs new file mode 100644 index 0000000..e92119b --- /dev/null +++ b/portable_hspec/Test/Hspec.hs @@ -0,0 +1,55 @@ +-- | Portable drop-in replacement of hspec that works both with GHC and MicroHs +module Test.Hspec +( Spec +, hspec +, describe +, it +, shouldBe +, shouldThrow +, shouldSatisfy +, context +) where + +import Data.IORef +import Control.Monad +import Control.Exception +import Control.Monad.IO.Class + +type Spec = IO () + +hspec :: Monad m => m a -> m a +hspec m = m + +context :: MonadIO m => String -> m a -> m a +context name m = do + liftIO $ putStrLn "............." + liftIO $ putStrLn name + m + +describe :: MonadIO m => String -> m a -> m a +describe name m = do + liftIO $ putStrLn "-------------" + liftIO $ putStrLn name + m + +it :: MonadIO m => String -> m a -> m a +it name m = do + liftIO $ putStrLn name + m + +shouldBe :: (MonadIO m, Show a, Eq a) => a -> a -> m () +shouldBe a b = unless (a == b) $ do + liftIO $ putStrLn $ "FAILED: expected " ++ show b ++ ", but got " ++ show a + +shouldSatisfy :: (MonadIO m, Show a) => a -> (a -> Bool) -> m () +shouldSatisfy a f = unless (f a) $ do + liftIO $ putStrLn $ "FAILED: result does't satisfy the condition: " ++ show a + +shouldThrow :: Exception e => IO a -> (e -> Bool) -> IO () +shouldThrow m f = do + me <- try m + case me of + Right _ -> putStrLn "FAILED: expected to throw" + Left e -> do + unless (f e) $ do + putStrLn $ "FAILED: exception doesn't satisfy the condition " ++ show e From c455ff898540bfe4ebe4db738a1675ed7146d22d Mon Sep 17 00:00:00 2001 From: Yuras Shumovich Date: Sun, 7 Sep 2025 19:26:46 +0200 Subject: [PATCH 5/9] ship for unsafeChr --- compat/GHC/Base.hs | 14 ++++++++++++++ lib/Scanner.hs | 6 ------ scanner.cabal | 1 + spec/spec.hs | 2 -- 4 files changed, 15 insertions(+), 8 deletions(-) create mode 100644 compat/GHC/Base.hs diff --git a/compat/GHC/Base.hs b/compat/GHC/Base.hs new file mode 100644 index 0000000..102f089 --- /dev/null +++ b/compat/GHC/Base.hs @@ -0,0 +1,14 @@ +{-# LANGUAGE PackageImports #-} +{-# LANGUAGE CPP #-} + +module GHC.Base +( unsafeChr +) where + +#ifdef __MHS__ +import Data.Char +unsafeChr :: Int -> Char +unsafeChr = chr +#else +import "base" GHC.Base +#endif diff --git a/lib/Scanner.hs b/lib/Scanner.hs index e72408e..841813b 100644 --- a/lib/Scanner.hs +++ b/lib/Scanner.hs @@ -50,13 +50,7 @@ import qualified Data.ByteString.Lazy as Lazy (ByteString) import qualified Data.ByteString.Lazy as Lazy.ByteString #endif import Control.Monad - -#ifdef __MHS__ -unsafeChr :: Int -> Char -unsafeChr = Char.chr -#else import GHC.Base (unsafeChr) -#endif -- | Scan the complete input, without resupplying scanOnly :: Scanner a -> ByteString -> Either String a diff --git a/scanner.cabal b/scanner.cabal index 8d677c1..055acda 100644 --- a/scanner.cabal +++ b/scanner.cabal @@ -24,6 +24,7 @@ library other-modules: Prelude Data.Either Data.ByteString.Unsafe + GHC.Base Scanner.OctetPredicates build-depends: base <5 , fail diff --git a/spec/spec.hs b/spec/spec.hs index b151593..a0e1a89 100644 --- a/spec/spec.hs +++ b/spec/spec.hs @@ -11,9 +11,7 @@ import Scanner import Prelude hiding (take, takeWhile) import Data.Either import qualified Data.ByteString as ByteString -#ifndef __MHS__ import qualified Data.ByteString.Lazy as Lazy.ByteString -#endif import Test.Hspec main :: IO () From afa37d3871e861fc04c01e234bc5debf4c3876d8 Mon Sep 17 00:00:00 2001 From: Yuras Shumovich Date: Sun, 7 Sep 2025 19:46:16 +0200 Subject: [PATCH 6/9] more stubs --- compat/Data/ByteString/Lazy_.hs | 22 ++++++++++++++++++++++ lib/Scanner.hs | 10 ++-------- scanner.cabal | 2 ++ spec/spec.hs | 12 +----------- 4 files changed, 27 insertions(+), 19 deletions(-) create mode 100644 compat/Data/ByteString/Lazy_.hs diff --git a/compat/Data/ByteString/Lazy_.hs b/compat/Data/ByteString/Lazy_.hs new file mode 100644 index 0000000..8654fa9 --- /dev/null +++ b/compat/Data/ByteString/Lazy_.hs @@ -0,0 +1,22 @@ +{-# LANGUAGE PackageImports #-} +{-# LANGUAGE CPP #-} + +module Data.ByteString.Lazy_ +( ByteString +, toChunks +, fromChunks +) where + +#ifdef __MHS__ +import qualified Data.ByteString as Strict + +newtype ByteString = ByteString [Strict.ByteString] + +toChunks :: ByteString -> [Strict.ByteString] +toChunks (ByteString chunks) = chunks + +fromChunks :: [Strict.ByteString] -> ByteString +fromChunks = ByteString +#else +import "bytestring" Data.ByteString.Lazy +#endif diff --git a/lib/Scanner.hs b/lib/Scanner.hs index 841813b..3d53350 100644 --- a/lib/Scanner.hs +++ b/lib/Scanner.hs @@ -14,9 +14,7 @@ module Scanner , Result (..) , scan , scanOnly -#ifndef __MHS__ , scanLazy -#endif , scanWith , anyWord8 , anyChar8 @@ -45,10 +43,8 @@ import Data.Word import qualified Data.Char as Char import Data.ByteString (ByteString) import qualified Data.ByteString as ByteString -#ifndef __MHS__ -import qualified Data.ByteString.Lazy as Lazy (ByteString) -import qualified Data.ByteString.Lazy as Lazy.ByteString -#endif +import qualified Data.ByteString.Lazy_ as Lazy (ByteString) +import qualified Data.ByteString.Lazy_ as Lazy.ByteString import Control.Monad import GHC.Base (unsafeChr) @@ -61,7 +57,6 @@ scanOnly s bs = go (scan s bs) Fail _ err -> Left err More more -> go (more ByteString.empty) -#ifndef __MHS__ -- | Scan lazy bytestring by resupplying scanner with chunks scanLazy :: Scanner a -> Lazy.ByteString -> Either String a scanLazy s lbs = go (scan s) (Lazy.ByteString.toChunks lbs) @@ -74,7 +69,6 @@ scanLazy s lbs = go (scan s) (Lazy.ByteString.toChunks lbs) Done _ r -> Right r Fail _ err -> Left err More more' -> go more' chunks' -#endif -- | Scan with the provided resupply action scanWith :: Monad m => m ByteString -> Scanner a -> ByteString -> m (Result a) diff --git a/scanner.cabal b/scanner.cabal index 055acda..4e57c24 100644 --- a/scanner.cabal +++ b/scanner.cabal @@ -24,6 +24,7 @@ library other-modules: Prelude Data.Either Data.ByteString.Unsafe + Data.ByteString.Lazy_ GHC.Base Scanner.OctetPredicates build-depends: base <5 @@ -43,6 +44,7 @@ test-suite spec , scanner other-modules: Prelude Data.Either + Data.ByteString.Lazy_ default-language: Haskell2010 benchmark bench diff --git a/spec/spec.hs b/spec/spec.hs index a0e1a89..c3a68b9 100644 --- a/spec/spec.hs +++ b/spec/spec.hs @@ -11,7 +11,7 @@ import Scanner import Prelude hiding (take, takeWhile) import Data.Either import qualified Data.ByteString as ByteString -import qualified Data.ByteString.Lazy as Lazy.ByteString +import qualified Data.ByteString.Lazy_ as Lazy.ByteString import Test.Hspec main :: IO () @@ -29,7 +29,6 @@ anyWord8Spec = describe "anyWord8" $ do let bs = ByteString.pack [42, 43] scanOnly anyWord8 bs `shouldBe` Right 42 -#ifndef __MHS__ it "should consume the current byte" $ do let bs = ByteString.pack [42, 43] scanOnly (anyWord8 *> anyWord8) bs `shouldBe` Right 43 @@ -47,7 +46,6 @@ anyWord8Spec = describe "anyWord8" $ do , ByteString.pack [43] ] scanLazy (anyWord8 *> anyWord8) bs `shouldBe` Right 43 -#endif it "should fail on end of input" $ do let bs = ByteString.empty @@ -59,14 +57,12 @@ stringSpec = describe "string" $ do let bs = "hello world" scanOnly (string "hello" *> anyWord8) bs `shouldBe` Right 32 -#ifndef __MHS__ it "should ask for more input" $ do let bs = Lazy.ByteString.fromChunks [ "hel" , "lo" ] scanLazy (string "hello") bs `shouldBe` Right () -#endif it "should fail on wrong input" $ do let bs = "helo world" @@ -78,7 +74,6 @@ takeSpec = describe "take" $ do let bs = "hello world" scanOnly (take 5) bs `shouldBe` Right "hello" -#ifndef __MHS__ it "should ask for more input" $ do let bs = Lazy.ByteString.fromChunks [ "he" @@ -97,7 +92,6 @@ takeSpec = describe "take" $ do , "l" ] scanLazy (take 5) bs' `shouldSatisfy` isLeft -#endif takeWhileSpec :: Spec takeWhileSpec = describe "takeWhile" $ do @@ -105,7 +99,6 @@ takeWhileSpec = describe "takeWhile" $ do let bs = "hello world" scanOnly (takeWhile (/= 32)) bs `shouldBe` Right "hello" -#ifndef __MHS__ it "should ask for more input" $ do let bs = Lazy.ByteString.fromChunks [ "he" @@ -113,7 +106,6 @@ takeWhileSpec = describe "takeWhile" $ do , "lo world" ] scanLazy (takeWhile (/= 32)) bs `shouldBe` Right "hello" -#endif it "should return everything is predicate where becomes False" $ do let bs = "hello" @@ -133,14 +125,12 @@ lookAheadSpec = describe "lookAhead" $ do let bs = ByteString.pack [42, 43] scanOnly (lookAhead *> anyWord8) bs `shouldBe` Right 42 -#ifndef __MHS__ it "should ask for more input" $ do let bs = Lazy.ByteString.fromChunks [ ByteString.pack [42] , ByteString.pack [43] ] scanLazy (anyWord8 *> lookAhead) bs `shouldBe` Right (Just 43) -#endif scanWithSpec :: Spec scanWithSpec = describe "scanWith" $ do From ed118a4bdbcf35591ef77ed3e2434fcd3f1d6d64 Mon Sep 17 00:00:00 2001 From: Yuras Shumovich Date: Sun, 7 Sep 2025 20:25:35 +0200 Subject: [PATCH 7/9] readme --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 8c839d2..726e91c 100644 --- a/README.md +++ b/README.md @@ -27,3 +27,8 @@ Benchmark results: But if you really really really need backtracking, then you can just inject attoparsec parser into a scanner: http://hackage.haskell.org/package/scanner-attoparsec + +# MicroHs + +The library should compile and work with MicroHs. +To run the test suite: `mhs -icompat -iportable_hspec -ilib -r spec/spec.hs` From ebf1cec0aa07cb3c1c338a26f53a57234e403080 Mon Sep 17 00:00:00 2001 From: Yuras Shumovich Date: Sun, 7 Sep 2025 21:28:51 +0200 Subject: [PATCH 8/9] lefover --- spec/spec.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/spec/spec.hs b/spec/spec.hs index c3a68b9..fb2f628 100644 --- a/spec/spec.hs +++ b/spec/spec.hs @@ -1,5 +1,4 @@ {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE CPP #-} module Main ( main From 3eab7ff946baddf455840e64aa465764ef956a69 Mon Sep 17 00:00:00 2001 From: Yuras Shumovich Date: Mon, 8 Sep 2025 20:58:13 +0200 Subject: [PATCH 9/9] now MicroHs has unsafeDrop --- compat/Data/ByteString/Unsafe.hs | 15 --------------- lib/Scanner.hs | 1 - scanner.cabal | 1 - 3 files changed, 17 deletions(-) delete mode 100644 compat/Data/ByteString/Unsafe.hs diff --git a/compat/Data/ByteString/Unsafe.hs b/compat/Data/ByteString/Unsafe.hs deleted file mode 100644 index ab02960..0000000 --- a/compat/Data/ByteString/Unsafe.hs +++ /dev/null @@ -1,15 +0,0 @@ -{-# LANGUAGE PackageImports #-} -{-# LANGUAGE CPP #-} - -module Data.ByteString.Unsafe -( unsafeDrop -) where - -#ifdef __MHS__ -import Data.ByteString (ByteString) -import Data.ByteString qualified as ByteString -unsafeDrop :: Int -> ByteString -> ByteString -unsafeDrop = ByteString.drop -#else -import "bytestring" Data.ByteString.Unsafe -#endif diff --git a/lib/Scanner.hs b/lib/Scanner.hs index 3d53350..f1e0012 100644 --- a/lib/Scanner.hs +++ b/lib/Scanner.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE CPP #-} -- | Fast not-backtracking incremental scanner for bytestrings -- diff --git a/scanner.cabal b/scanner.cabal index 4e57c24..1133cba 100644 --- a/scanner.cabal +++ b/scanner.cabal @@ -23,7 +23,6 @@ library Scanner.Internal other-modules: Prelude Data.Either - Data.ByteString.Unsafe Data.ByteString.Lazy_ GHC.Base Scanner.OctetPredicates