diff --git a/baron/grouper.py b/baron/grouper.py index 70ef033e..39289a5d 100644 --- a/baron/grouper.py +++ b/baron/grouper.py @@ -1,6 +1,6 @@ # encoding: utf-8 -import re +import regex as re from .utils import FlexibleIterator to_group = ( diff --git a/baron/render.py b/baron/render.py index 7addb110..6234a2cf 100644 --- a/baron/render.py +++ b/baron/render.py @@ -1,6 +1,8 @@ import sys import json +import six + def render(node, strict=False): """Recipe to render a given FST node. @@ -69,7 +71,7 @@ def render_node(node, strict=False): if key_type == "key": assert isinstance(node[render_key], (dict, type(None))), "Key '%s' is expected to have type of 'key' (dict/None) but has type of '%s' instead" % (render_key, type(node[render_key])) elif key_type == "string": - assert isinstance(node[render_key], str), "Key '%s' is expected to have type of 'string' but has type of '%s' instead" % (render_key, type(node[render_key])) + assert isinstance(node[render_key], six.string_types), "Key '%s' is expected to have type of 'string' but has type of '%s' instead" % (render_key, type(node[render_key])) elif key_type in ("list", "formatting"): assert isinstance(node[render_key], list), "Key '%s' is expected to have type of 'list' but has type of '%s' instead" % (render_key, type(node[render_key])) elif key_type == "constant": diff --git a/baron/spliter.py b/baron/spliter.py index e833d40e..5f3dcc48 100644 --- a/baron/spliter.py +++ b/baron/spliter.py @@ -1,5 +1,5 @@ import string -from .utils import FlexibleIterator, BaronError +from .utils import FlexibleIterator, BaronError, is_xid_start, is_xid_continue def split(sequence): @@ -58,12 +58,16 @@ def split_generator(sequence): not_found = False yield iterator.grab(lambda iterator: iterator.show_next() in section) + if iterator.next_is(is_xid_start) or iterator.next_is(is_xid_continue): + not_found = False + yield iterator.grab(lambda iterator: iterator.next_is(is_xid_start) or iterator.next_is(is_xid_continue)) + for one in "@,.;()=*:+-/^%&<>|\r\n~[]{}!``\\": if iterator.next_in(one): not_found = False yield next(iterator) - if iterator.show_next().__repr__().startswith("'\\x"): + if iterator.show_next().__repr__().startswith(r"'\x"): # guys, seriously, how do you manage to put this shit in your code? # I mean, I don't even know how this is possible! # example of guilty file: ve/lib/python2.7/site-packages/tests/test_oauth.py diff --git a/baron/tokenizer.py b/baron/tokenizer.py index f4bb60b7..9c770e90 100644 --- a/baron/tokenizer.py +++ b/baron/tokenizer.py @@ -1,4 +1,4 @@ -import re +import regex as re from .utils import BaronError @@ -9,7 +9,7 @@ class UnknowItem(BaronError): KEYWORDS = ("and", "as", "assert", "break", "class", "continue", "def", "del", "elif", "else", "except", "exec", "finally", "for", "from", "global", "nonlocal", "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", "raise", "return", "try", "while", "with", "yield") TOKENS = ( - (r'[a-zA-Z_]\w*', 'NAME'), + (r'[\p{XID_Start}_]\p{XID_Continue}*', 'NAME'), (r'0', 'INT'), (r'[-+]?\d+[eE][-+]?\d+[jJ]', 'FLOAT_EXPONANT_COMPLEX'), (r'[-+]?\d+.\d?[eE][-+]?\d+[jJ]', 'FLOAT_EXPONANT_COMPLEX'), diff --git a/baron/utils.py b/baron/utils.py index f90b630b..ed3df74e 100644 --- a/baron/utils.py +++ b/baron/utils.py @@ -1,10 +1,11 @@ import sys -import re +import regex as re +import six python_version = sys.version_info[0] python_subversion = sys.version_info[1] -string_instance = str if python_version == 3 else basestring +string_instance = six.string_types # alias, for isinstance usage in redbaron class BaronError(Exception): @@ -33,9 +34,12 @@ def next_starts_with(self, sentence): return self.sequence[self.position + 1: self.position + 1 + size_of_choice] == sentence def next_in(self, choice): + return self.next_is(lambda item: item in choice) + + def next_is(self, predicate): if self.position + 1 >= len(self.sequence): return False - return self.sequence[self.position + 1] in choice + return predicate(self.sequence[self.position + 1]) def show_next(self, at=1): if self.position + at >= len(self.sequence): @@ -106,6 +110,20 @@ def split_on_newlines(text): yield text[current_position:] +xid_start_regex = re.compile(r"\p{XID_Start}") + + +def is_xid_start(char): + return xid_start_regex.match(char) + + +xid_continue_regex = re.compile(r"\p{XID_Continue}") + + +def is_xid_continue(char): + return xid_continue_regex.match(char) + + # Thanks to # https://github.com/nvie/rq/commit/282f4be9316d608ebbacd6114aab1203591e8f95 if python_version >= 3 or python_subversion >= 7: @@ -115,26 +133,25 @@ def total_ordering(cls): """Class decorator that fills in missing ordering methods""" convert = { '__lt__': [('__gt__', lambda self, other: other < self), - ('__le__', lambda self, other: not other < self), - ('__ge__', lambda self, other: not self < other)], + ('__le__', lambda self, other: not other < self), + ('__ge__', lambda self, other: not self < other)], '__le__': [('__ge__', lambda self, other: other <= self), - ('__lt__', lambda self, other: not other <= self), - ('__gt__', lambda self, other: not self <= other)], + ('__lt__', lambda self, other: not other <= self), + ('__gt__', lambda self, other: not self <= other)], '__gt__': [('__lt__', lambda self, other: other > self), - ('__ge__', lambda self, other: not other > self), - ('__le__', lambda self, other: not self > other)], + ('__ge__', lambda self, other: not other > self), + ('__le__', lambda self, other: not self > other)], '__ge__': [('__le__', lambda self, other: other >= self), - ('__gt__', lambda self, other: not other >= self), - ('__lt__', lambda self, other: not self >= other)] + ('__gt__', lambda self, other: not other >= self), + ('__lt__', lambda self, other: not self >= other)] } roots = set(dir(cls)) & set(convert) if not roots: raise ValueError('must define at least one ordering operation: < > <= >=') # noqa - root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__ + root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__ for opname, opfunc in convert[root]: if opname not in roots: opfunc.__name__ = opname opfunc.__doc__ = getattr(int, opname).__doc__ setattr(cls, opname, opfunc) return cls - diff --git a/requirements.txt b/requirements.txt index 31ad5809..92268f84 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,3 @@ rply +regex +six \ No newline at end of file diff --git a/setup.py b/setup.py index a9740c02..5c96d51d 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ long_description=read_md("README.md") + "\n\n" + open("CHANGELOG", "r").read(), author_email='cortex@worlddomination.be', url='https://github.com/PyCQA/baron', - install_requires=['rply'], + install_requires=['rply', 'regex', 'six'], packages=['baron'], license='lgplv3+', scripts=[], diff --git a/tests/test_path.py b/tests/test_path.py index 99063978..2e80d06d 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -1,8 +1,9 @@ +import six + from baron.baron import parse from baron.path import PathWalker, Position, BoundingBox -from baron.path import position_to_path, path_to_node, position_to_node from baron.path import path_to_bounding_box, node_to_bounding_box -from baron.utils import string_instance +from baron.path import position_to_path, path_to_node, position_to_node def test_position(): @@ -145,7 +146,7 @@ def check_path(code, positions, target_path): return node = path_to_node(tree, path) - assert isinstance(node, string_instance) + assert isinstance(node, six.string_types) assert position_to_node(tree, position) is node diff --git a/tests/test_spliter.py b/tests/test_spliter.py index 34402aa0..2f9bde6f 100644 --- a/tests/test_spliter.py +++ b/tests/test_spliter.py @@ -1,6 +1,6 @@ #!/usr/bin/python # -*- coding:Utf-8 -*- - +import six from baron.spliter import split, UntreatedError from baron.utils import python_version @@ -91,6 +91,11 @@ def test_assign(): assert split("a = b") == ["a", " ", "=", " ", "b"] +if six.PY3: + def test_assign_unicode(): + assert split("α = β") == ["α", " ", "=", " ", "β"] + + def test_call(): assert split("function()") == ["function", "(", ")"] @@ -247,7 +252,9 @@ def test_if(): def test_if_elif_else(): - assert split("if a:\n pass\nelif b:\n pass\nelse: \n pass") == ["if", " ", "a", ":", "\n", " ", "pass", "\n", "elif", " ", "b", ":", "\n", " ", "pass", "\n", "else", ":", " ", "\n", " ", "pass"] + assert split("if a:\n pass\nelif b:\n pass\nelse: \n pass") == ["if", " ", "a", ":", "\n", " ", "pass", "\n", + "elif", " ", "b", ":", "\n", " ", "pass", "\n", + "else", ":", " ", "\n", " ", "pass"] def test_while(): @@ -365,10 +372,12 @@ def test_backslash_in_comment(): def test_regression(): - assert split("(r'[\"\\'](.|\n|\r)*[\"\\']', 'STRING'),") == ["(", "r", "'[\"\\'](.|\n|\r)*[\"\\']'", ",", " ", "'STRING'", ")", ","] + assert split("(r'[\"\\'](.|\n|\r)*[\"\\']', 'STRING'),") == ["(", "r", "'[\"\\'](.|\n|\r)*[\"\\']'", ",", " ", + "'STRING'", ")", ","] + # TODO: make this test pass in python3 also # requires to remove dependency on ast.py -if python_version == 2: +if six.PY2: def test_remove_crap(): assert split("\x0c\xef\xbb\xbf") == [] diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index ae865420..da5ad037 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -1,6 +1,6 @@ #!/usr/bin/python # -*- coding:Utf-8 -*- - +import six from baron.tokenizer import tokenize, KEYWORDS @@ -22,6 +22,12 @@ def test_name__(): match('_a', 'NAME') +if six.PY3: + def test_name_unicode(): + match('β', 'NAME') + match('가사', 'NAME') + + def test_name_number(): match('a123', 'NAME') @@ -551,6 +557,7 @@ def test_exponant_complex(): match("-1.1E+1J", "FLOAT_EXPONANT_COMPLEX") match("-.1E+1J", "FLOAT_EXPONANT_COMPLEX") + # TODO 1.1e1j