lr0/parser.hs at master · CsengerG/lr0 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
import qualified Data.Set as Set
import Data.Ord as Ord
import Data.Maybe as Maybe
import Data.Char as Char
import Debug.Trace

---------------------------------------------------------------------------------------------------
------------------------------------------- TYPES -------------------------------------------------
---------------------------------------------------------------------------------------------------

-- The Tokens the lexer can return with
data Token = NumberToken Float
           | PlusToken
           | MinusToken
           | StarToken
           | ExclMarkToken
           | CosToken
           | OpParToken
           | ClParToken
             deriving (Show, Eq, Ord)

-- Make sure Terminal includes EndMarker and that NonTerminal includes Start and AugStart, where
data Terminal = EndMarker
              | Plus
              | Minus
              | Star
              | ExclMark
              | Cos
              | OpPar
              | ClPar
              | Number
                deriving (Show, Eq, Ord)

-- Start is the start symbol of your grammar. You MUST NOT use AugStart when describing your grammar.
data NonTerminal = AugStart | Start
                 | E
                 | Term
                 | CosTerm
                 | FactorialTerm
                 | Factor
                 | SignedNumber
                   deriving (Show, Eq, Ord)

-- GrammarSymbol is used in production rules --
data GrammarSymbol = T Terminal | NT NonTerminal deriving (Show, Eq, Ord)

-- ItemSymbol is used to describe items --
data ItemSymbol = IS GrammarSymbol | ItemDot deriving (Show, Eq, Ord)

-- The format of production rules and items is almost the same --
data ProductionRule = PR (NonTerminal, [GrammarSymbol]) deriving (Show, Eq, Ord)
data Item = I (NonTerminal, [ItemSymbol]) deriving (Show, Eq, Ord)

-- Just a tree
data Tree a = Br (a, [Tree a]) | Lf a deriving Show

-- Describe your grammar here by listing the production rules
prodRules = [
  PR(Start, [NT E]),

  PR(E, [NT E, T Plus, NT E]),
  PR(E, [NT E, T Minus, NT E]),
  PR(E, [T Plus, NT E]),
  PR(E, [T Minus, NT E]),
  PR(E, [NT Term]),

  PR(Term, [NT Term, T Star, NT CosTerm]),
  PR(Term, [NT CosTerm]),

  PR(CosTerm, [T Cos, NT CosTerm]),
  PR(CosTerm, [NT FactorialTerm]),

  PR(FactorialTerm, [NT FactorialTerm, T ExclMark]),
  PR(FactorialTerm, [NT Factor]),

  PR(Factor, [NT SignedNumber]),
  PR(Factor, [T OpPar, NT E, T ClPar]),

  PR(SignedNumber, [T Number]),
  PR(SignedNumber, [T Plus, T Number]),
  PR(SignedNumber, [T Minus, T Number])]


---------------------------------------------------------------------------------------------------
--------------------------------- UTILITY ---------------------------------------------------------
---------------------------------------------------------------------------------------------------

fact 0 = 1
fact n = n * fact (n-1)

closure :: (Ord a) => (a -> [a]) -> Set.Set a -> Set.Set a
-- Compute the closure of a Set.
closure arrows set =
  closureBFS (Set.fromList []) (Set.elems set)
    where
      closureBFS was [] = was
      closureBFS was (q:qs) = closureBFS newWas newQueue
        where
          newWas= Set.insert q was
          newQueue = qs ++ [x | x <- arrows q, Set.notMember x was]


itemArrows :: Item -> [Item]
-- Given an item, get the list of neighbouring items
itemArrows (I(_, symbols)) = (consWithDot . productionsWith . gsAfterDot) symbols
  where
    gsAfterDot :: [ItemSymbol] -> Maybe GrammarSymbol
    -- Get the GrammarSymbol after the Dot or nothing if not found.
    gsAfterDot [] = Nothing
    gsAfterDot (ItemDot:(IS gs):_) = Just gs
    gsAfterDot (x:xs) = gsAfterDot xs

    productionsWith :: Maybe GrammarSymbol -> [ProductionRule]
    -- Gives all the productions for a GrammarSymbol.
    productionsWith Nothing = []
    productionsWith (Just symbol) = filter (\(PR(nt, _)) -> symbol == NT nt) prodRules

    consWithDot :: [ProductionRule] -> [Item]
    -- Create a new Item from each production rule by prepending it with a dot.
    consWithDot = map (\(PR(nt, gss)) -> I(nt, ItemDot:(toIS gss)))
      where toIS = map (\gs -> IS gs)


-- Define the function itemClosure as a partially applied closure function.
itemClosure = closure itemArrows


goto :: Set.Set Item -> GrammarSymbol -> Set.Set Item
-- GOTO function: compute it by the definition
goto i x = (itemClosure . moveDotRightToX . getItemsWithDotLeftToX) i
  where
    moveDotRightToX :: Set.Set Item -> Set.Set Item
    moveDotRightToX = Set.map (\(I(nt, iSymbols)) -> I(nt, moveDotRight iSymbols))
      where
        moveDotRight :: [ItemSymbol] -> [ItemSymbol]
        moveDotRight [] = []
        moveDotRight (ItemDot : y : ys) = y : ItemDot : ys
        moveDotRight (y:ys) = y : moveDotRight ys

    getItemsWithDotLeftToX :: Set.Set Item -> Set.Set Item
    getItemsWithDotLeftToX = Set.filter (\(I(_, is)) -> isDotLeftToX is)
      where
        isDotLeftToX :: [ItemSymbol] -> Bool
        isDotLeftToX [] = False
        isDotLeftToX (ItemDot:y:ys) = IS x == y
        isDotLeftToX (y:ys) = isDotLeftToX ys


-- The start state is always one with the item AugStart -> . Start
startState = itemClosure (Set.fromList [I(AugStart, [ItemDot, IS (NT Start)])])
emptySet = Set.fromList []

--------------------------------
---------- MATCH ---------------
--------------------------------

data Match = M(String, String) deriving Show

matchEps :: Match -> Match
matchEps = \rest -> rest

matchOptPm :: Match -> Match
matchOptPm (M(m, ('+':rest))) = M('+':m, rest)
matchOptPm (M(m, ('-':rest))) = M('-':m, rest)
matchOptPm rest = rest

matchDigit :: Match -> Match
matchDigit (M(m, [])) = error "Unexpected EOF"
matchDigit (M(m, (ch:rest))) =
  if isDigit ch then M(ch:m, rest)
  else error $ "Expected digit (0-9), but found " ++ show ch

matchDigits :: Match -> Match
matchDigits match = (digits . matchDigit) match
    where digits (M(m,r)) = M((reverse . (takeWhile isDigit)) r ++ m, dropWhile isDigit r)

matchOptDigits :: Match -> Match
matchOptDigits (match @ (M(m, []))) = match
matchOptDigits (match @ (M(m, (ch:chs)))) =
  if isDigit ch then matchDigits match
  else match

matchOptExp :: Match -> Match
matchOptExp (M(m, ('e':chs))) = (matchDigits . matchOptPm) (M('e':m, chs))
matchOptExp (M(m, ('E':chs))) = (matchDigits . matchOptPm) (M('E':m, chs))
matchOptExp match = match

matchOptDotOptDigits :: Match -> Match
matchOptDotOptDigits (M(m, ('.':chs))) = matchOptDigits (M('.':m, chs))
matchOptDotOptDigits match = match

matchMiddle :: Match -> Match
matchMiddle (M(m, ('.':chs))) = matchDigits (M('.':m, chs))
matchMiddle match = (matchOptDotOptDigits . matchDigits) match

matchFp :: String -> Match
matchFp str = (matchOptExp . matchMiddle . matchOptPm) (M([], str))

---------------------------------------------------------------------------------------------------
------------------------------------- LEXER -------------------------------------------------------
---------------------------------------------------------------------------------------------------

lexString :: [Char] -> [Token]
lexString = (lexUtil . filter (not . isSpace))
  where
    lexUtil :: [Char] -> [Token]
    lexUtil [] = []
    lexUtil ('+':chs) = PlusToken : lexUtil chs
    lexUtil ('-':chs) = MinusToken : lexUtil chs
    lexUtil ('*':chs) = StarToken : lexUtil chs
    lexUtil ('!':chs) = ExclMarkToken : lexUtil chs
    lexUtil ('(':chs) = OpParToken : lexUtil chs
    lexUtil (')':chs) = ClParToken : lexUtil chs
    lexUtil ('c':'o':'s':chs) = CosToken : lexUtil chs
    lexUtil chs = NumberToken num : lexUtil rest
      where
        M(fp, rest) = matchFp chs
        num = read ('0':(reverse fp)) :: Float

---------------------------------------------------------------------------------------------------
------------------------------------- PARSER ------------------------------------------------------
---------------------------------------------------------------------------------------------------

parser :: [Terminal] -> Tree GrammarSymbol
-- The LR(0) parser's state can be defined by a stack and by the rest of the input symbols. For
-- convenience we also keep track of the symbols and the constructed parsetree.
parser input = parserUtil [startState] [T EndMarker] (map T input) []
  where
    parserUtil :: [Set.Set Item] -> [GrammarSymbol] -> [GrammarSymbol]
                                 -> [Tree GrammarSymbol] -> Tree GrammarSymbol
    parserUtil _ [NT AugStart, T EndMarker] [T EndMarker] trees =
      -- At this point the list of the trees should be of length 1, so return the head if this is
      -- the case. Otherwise the input is invalid and we should throw an exception.
      head trees
    parserUtil (stack @ (currState:_)) symbols (input @ (nextSymbol:restOfInput)) trees =
      -- Shift or reduce based on GOTO. If an empty set is returned, that means there is no shift
      -- so we have to reduce.
        if nextState /= emptySet then -- shift
          parserUtil (nextState:stack) (nextSymbol:symbols) restOfInput ((Lf nextSymbol) : trees)
        else parserUtil newStack newSymbols input newTree -- reduce
          where
            nextState = goto currState nextSymbol

            -- The production rule used for the reduction.
            candidates = filter (\(I(_, is)) -> last is == ItemDot) (Set.toList currState)
            (I (nt, beta)) = head candidates
            prodRuleLength = length beta - 1

            -- Determine the new state of the parser. Pop prodRuleLength states from the stack and
            -- use the new obtained symbol with GOTO to get the new state. Push the new state to
            -- the stack.
            droppedStack = drop prodRuleLength stack
            newState = goto (head droppedStack) (NT nt)

            newStack = newState : droppedStack
            newSymbols = (NT nt) : (drop prodRuleLength symbols)

            -- Create a new node in the tree. The new node has to have prodRuleLength children, so
            -- take as many parse subtrees from the tree stack.
            newTree = Br(NT nt, reverse $ take prodRuleLength trees) : (drop prodRuleLength trees)


-- DEBUG: example = itemClosure (Set.fromList [I(Start, [ItemDot, IS (NT E)])])
-- DEBUG: example2 = goto startState (T Id)
-- DEBUG: example3 = parser [Id, Star, Id, EndMarker]


eval strExpr = fst $ evalUtil parsedTree values
  where
    lexedStr = lexString strExpr
    values = [x | NumberToken x <- lexedStr]

    parsedTree = parser $ (map toTerminal lexedStr) ++ [EndMarker]
      where
        toTerminal (NumberToken _) = Number
        toTerminal PlusToken = Plus
        toTerminal MinusToken = Minus
        toTerminal StarToken = Star
        toTerminal ExclMarkToken = ExclMark
        toTerminal CosToken = Cos
        toTerminal OpParToken = OpPar
        toTerminal ClParToken = ClPar

    -- Number --
    evalUtil (Lf _) (v:vs) =
      (v,vs)

    -- AugStart --
    evalUtil (Br (NT AugStart, [augStartTree])) vs =
      evalUtil augStartTree vs

    -- Start --
    evalUtil (Br (NT Start, [startTree])) vs =
      evalUtil startTree vs

    -- E --
    evalUtil (Br (NT E, [Lf (T Plus), exprTree])) vs =
      evalUtil exprTree vs
    evalUtil (Br (NT E, [Lf (T Minus), exprTree])) vs =
      (-1.0*ret, vState) where
        (ret, vState) = evalUtil exprTree vs
    evalUtil (Br (NT E, [e1, Lf (T Plus), e2])) vs =
      (ret1+ret2, v2) where
        (ret1, v1) = evalUtil e1 vs
        (ret2, v2) = evalUtil e2 v1
    evalUtil (Br (NT E, [e1, Lf (T Minus), e2])) vs =
      (ret1-ret2, v2) where
        (ret1, v1) = evalUtil e1 vs
        (ret2, v2) = evalUtil e2 v1
    evalUtil (Br (NT E, [termTree])) vs =
      evalUtil termTree vs

    -- term --
    evalUtil (Br (NT Term, [t, _, ct])) vs =
      (ret1*ret2, v2) where
        (ret1, v1) = evalUtil t vs
        (ret2, v2) = evalUtil ct v1
    evalUtil (Br (NT Term, [cosTermTree])) vs =
      evalUtil cosTermTree vs

    -- costerm --
    evalUtil (Br (NT CosTerm, [_, cosTermTree])) vs =
      (cos ret, vState) where
        (ret, vState) = evalUtil cosTermTree vs
    evalUtil (Br (NT CosTerm, [factorialTermTree])) vs =
      evalUtil factorialTermTree vs

    -- factorialterm --
    evalUtil (Br (NT FactorialTerm, [factorialTermTree, _])) vs =
      (fromInteger $ fact (floor ret), vState) where
        (ret, vState) = evalUtil factorialTermTree vs
    evalUtil (Br (NT FactorialTerm, [factorTree])) vs =
      evalUtil factorTree vs

    -- factor --
    evalUtil (Br (NT Factor, [signedNumberTree])) vs =
      evalUtil signedNumberTree vs
    evalUtil (Br (NT Factor, [_ , exprTree, _])) vs =
      evalUtil exprTree vs

    -- signednumber --
    evalUtil (Br (NT SignedNumber, [numberTree])) vs =
      evalUtil numberTree vs
    evalUtil (Br (NT SignedNumber, [Lf (T Plus), numberTree])) vs =
      evalUtil numberTree vs
    evalUtil (Br (NT SignedNumber, [Lf (T Minus), numberTree])) vs =
      (-ret, v1) where
        (ret, v1) = evalUtil numberTree vs


tests = [
  (eval "1", 1.0),
  (eval ".1", 0.1),
  (eval "1+1", 2.0),
  (eval ".2e+2+-1.0e2*100.02", -9982.0),
  (eval "cos 12!", -0.76619765),
  (eval "(123)", 123.0),
  (eval "1000e-3", 1.0),
  (eval "-1000e-3", -1.0),
  (eval "1*2+3*4+5*6*7", 224.0),
  (eval "cos cos cos 3!!", 0.784951602),
  (eval "1+1-1+1-1+1-1", 1.0),
  (eval "1+++        ++++  +++1", 2.0),
  (eval "- - - -1 +-+-+ 1", 2.0),
  (eval "cos -2", -0.41614684)]