Skip to content

Commit 4b3c658

Browse files
committed
fix(tokenizer): update string parsing regex to handle \\"
1 parent b429148 commit 4b3c658

1 file changed

Lines changed: 9 additions & 4 deletions

File tree

arkdoc/parser/tokenizer.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python3
22

33
import re
4-
from typing import List
4+
from typing import List, Any, Generator
55
from dataclasses import dataclass
66

77

@@ -21,7 +21,7 @@ def __str__(self):
2121
Keywords = "let mut set del fun if while import begin".split()
2222
TokenSpecification = [
2323
("NUMBER", r"\d+(\.\d*)?"),
24-
("STRING", r"\"[^\"]*\""),
24+
("STRING", r"\"(\\\\|\\\"|[^\"])*\""),
2525
("ID", r"[\w:?=!@&<>+\-%*/.$]+"),
2626
("LPAREN", r"[(\[{]"),
2727
("RPAREN", r"[)\]}]"),
@@ -32,7 +32,7 @@ def __str__(self):
3232
]
3333

3434

35-
def tokenize(code: str) -> List[Token]:
35+
def tokenize(code: str) -> Generator[Token, Any, None]:
3636
tok_regex = "|".join("(?P<%s>%s)" % pair for pair in TokenSpecification)
3737
line_num = 1
3838
line_start = 0
@@ -98,13 +98,18 @@ def tree_from_tokens(tokens: List[Token]) -> List:
9898
L = []
9999
while token.type == "COMMENT":
100100
L.append(token)
101+
if not tokens:
102+
raise RuntimeError(f"Expected more after {token}")
101103
token = tokens.pop(0)
102104

103105
if token.type == "LPAREN":
104106
L2 = []
107+
last_token = tokens[0]
105108
while tokens[0].type != "RPAREN":
109+
if not tokens:
110+
raise SyntaxError(f"No more tokens after {last_token}")
106111
L2.append(tree_from_tokens(tokens))
107-
tokens.pop(0)
112+
last_token = tokens.pop(0)
108113
L.append(L2)
109114
return L
110115
elif token.type == "RPAREN":

0 commit comments

Comments
 (0)