forked from josephwon0310/Launchpad
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
22 lines (17 loc) · 859 Bytes
/
parser.py
File metadata and controls
22 lines (17 loc) · 859 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import os
import re
import sys
import unidecode
default_regex = re.compile(r"\n[\t ]*([A-Z0-9'\- ]+[A-Z0-9'\-])(?: \(.+\))?(?: \[.+\])?\n(([\t ]+(?!\(|\[|\s))?(?(3).*(?:\n\3[\t ]*\S+.*)*|[\t ]+\S+.*(?:\n[\t ]+\S+.*)*))")
def parse_script(filename, directory='scripts', regex=default_regex):
with open(os.path.join(directory, filename), encoding='ISO-8859-1') as content_file:
matches = re.finditer(regex, unidecode.unidecode(re.sub(u'u\0092', '\'', content_file.read())))
lines = []
for m in matches:
name = m.group(1)
line = re.sub('\s+', ' ', re.sub('\(.+?\)|\[.+?\]', '', m.group(2), 0, re.DOTALL)).strip()
if len(line):
lines.append((name, line))
return lines
def line_to_words(line):
return re.findall(r'\'?[\w]+(?:[\'\u2019-][\w]+)*\'?', line)