From 88864d7afa639d69cc9d2a60d6d7598d7187f74d Mon Sep 17 00:00:00 2001
From: Dylan Lukes <lukes.dylan@gmail.com>
Date: Thu, 23 Jan 2020 17:15:38 -0800
Subject: [PATCH 1/6] replace re => regex

---
 baron/grouper.py   | 2 +-
 baron/tokenizer.py | 2 +-
 baron/utils.py     | 2 +-
 requirements.txt   | 1 +
 setup.py           | 2 +-
 5 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/baron/grouper.py b/baron/grouper.py
index 70ef033e..39289a5d 100644
--- a/baron/grouper.py
+++ b/baron/grouper.py
@@ -1,6 +1,6 @@
 # encoding: utf-8
 
-import re
+import regex as re
 from .utils import FlexibleIterator
 
 to_group = (
diff --git a/baron/tokenizer.py b/baron/tokenizer.py
index f4bb60b7..1eacd7bf 100644
--- a/baron/tokenizer.py
+++ b/baron/tokenizer.py
@@ -1,4 +1,4 @@
-import re
+import regex as re
 from .utils import BaronError
 
 
diff --git a/baron/utils.py b/baron/utils.py
index f90b630b..bd4512ed 100644
--- a/baron/utils.py
+++ b/baron/utils.py
@@ -1,5 +1,5 @@
 import sys
-import re
+import regex as re
 
 
 python_version = sys.version_info[0]
diff --git a/requirements.txt b/requirements.txt
index 31ad5809..e84fabe6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
 rply
+regex
\ No newline at end of file
diff --git a/setup.py b/setup.py
index a9740c02..fa2ad351 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
       long_description=read_md("README.md") + "\n\n" + open("CHANGELOG", "r").read(),
       author_email='cortex@worlddomination.be',
       url='https://github.com/PyCQA/baron',
-      install_requires=['rply'],
+      install_requires=['rply', 'regex'],
       packages=['baron'],
       license='lgplv3+',
       scripts=[],

From 99514e97fd71c0a1eed62d0906f24b7016282f39 Mon Sep 17 00:00:00 2001
From: Dylan Lukes <lukes.dylan@gmail.com>
Date: Thu, 23 Jan 2020 17:19:54 -0800
Subject: [PATCH 2/6] NAME regex "[a-zA-Z_]\w*" =>
 "[\p{XID_Start}_]\p{XID_Continue}*"

---
 baron/tokenizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/baron/tokenizer.py b/baron/tokenizer.py
index 1eacd7bf..9c770e90 100644
--- a/baron/tokenizer.py
+++ b/baron/tokenizer.py
@@ -9,7 +9,7 @@ class UnknowItem(BaronError):
 KEYWORDS = ("and", "as", "assert", "break", "class", "continue", "def", "del", "elif", "else", "except", "exec", "finally", "for", "from", "global", "nonlocal", "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", "raise", "return", "try", "while", "with", "yield")
 
 TOKENS = (
-    (r'[a-zA-Z_]\w*', 'NAME'),
+    (r'[\p{XID_Start}_]\p{XID_Continue}*', 'NAME'),
     (r'0', 'INT'),
     (r'[-+]?\d+[eE][-+]?\d+[jJ]', 'FLOAT_EXPONANT_COMPLEX'),
     (r'[-+]?\d+.\d?[eE][-+]?\d+[jJ]', 'FLOAT_EXPONANT_COMPLEX'),

From 9517d7e34ed6b78f2ae58f187945a0a5769a62cd Mon Sep 17 00:00:00 2001
From: Dylan Lukes <lukes.dylan@gmail.com>
Date: Thu, 23 Jan 2020 17:21:05 -0800
Subject: [PATCH 3/6] add test for unicode name

---
 tests/test_tokenizer.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py
index ae865420..01d03809 100644
--- a/tests/test_tokenizer.py
+++ b/tests/test_tokenizer.py
@@ -22,6 +22,11 @@ def test_name__():
     match('_a', 'NAME')
 
 
+def test_name_unicode():
+    match('β', 'NAME')
+    match('가사', 'NAME')
+
+
 def test_name_number():
     match('a123', 'NAME')
 
@@ -551,6 +556,7 @@ def test_exponant_complex():
     match("-1.1E+1J", "FLOAT_EXPONANT_COMPLEX")
     match("-.1E+1J", "FLOAT_EXPONANT_COMPLEX")
 
+
 # TODO 1.1e1j
 
 

From 19d61efd0ec208b5155c96b1aa02f47937e0d2ba Mon Sep 17 00:00:00 2001
From: Dylan Lukes <lukes.dylan@gmail.com>
Date: Thu, 23 Jan 2020 22:12:38 -0800
Subject: [PATCH 4/6] support for splitting non-ASCII

---
 baron/spliter.py      |  6 +++++-
 baron/utils.py        | 41 +++++++++++++++++++++++++++++------------
 tests/test_spliter.py | 12 ++++++++++--
 3 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/baron/spliter.py b/baron/spliter.py
index e833d40e..956d8f19 100644
--- a/baron/spliter.py
+++ b/baron/spliter.py
@@ -1,5 +1,5 @@
 import string
-from .utils import FlexibleIterator, BaronError
+from .utils import FlexibleIterator, BaronError, is_xid_start, is_xid_continue
 
 
 def split(sequence):
@@ -58,6 +58,10 @@ def split_generator(sequence):
                 not_found = False
                 yield iterator.grab(lambda iterator: iterator.show_next() in section)
 
+        if iterator.next_is(is_xid_start) or iterator.next_is(is_xid_continue):
+            not_found = False
+            yield iterator.grab(lambda iterator: iterator.next_is(is_xid_start) or iterator.next_is(is_xid_continue))
+
         for one in "@,.;()=*:+-/^%&<>|\r\n~[]{}!``\\":
             if iterator.next_in(one):
                 not_found = False
diff --git a/baron/utils.py b/baron/utils.py
index bd4512ed..cc7d709d 100644
--- a/baron/utils.py
+++ b/baron/utils.py
@@ -1,6 +1,7 @@
 import sys
-import regex as re
+import unicodedata
 
+import regex as re
 
 python_version = sys.version_info[0]
 python_subversion = sys.version_info[1]
@@ -33,9 +34,12 @@ def next_starts_with(self, sentence):
         return self.sequence[self.position + 1: self.position + 1 + size_of_choice] == sentence
 
     def next_in(self, choice):
+        return self.next_is(lambda item: item in choice)
+
+    def next_is(self, predicate):
         if self.position + 1 >= len(self.sequence):
             return False
-        return self.sequence[self.position + 1] in choice
+        return predicate(self.sequence[self.position + 1])
 
     def show_next(self, at=1):
         if self.position + at >= len(self.sequence):
@@ -106,6 +110,20 @@ def split_on_newlines(text):
         yield text[current_position:]
 
 
+xid_start_regex = re.compile(r"\p{XID_Start}")
+
+
+def is_xid_start(char):
+    return xid_start_regex.match(char)
+
+
+xid_continue_regex = re.compile(r"\p{XID_Continue}")
+
+
+def is_xid_continue(char):
+    return xid_continue_regex.match(char)
+
+
 # Thanks to
 # https://github.com/nvie/rq/commit/282f4be9316d608ebbacd6114aab1203591e8f95
 if python_version >= 3 or python_subversion >= 7:
@@ -115,26 +133,25 @@ def total_ordering(cls):
         """Class decorator that fills in missing ordering methods"""
         convert = {
             '__lt__': [('__gt__', lambda self, other: other < self),
-                    ('__le__', lambda self, other: not other < self),
-                    ('__ge__', lambda self, other: not self < other)],
+                       ('__le__', lambda self, other: not other < self),
+                       ('__ge__', lambda self, other: not self < other)],
             '__le__': [('__ge__', lambda self, other: other <= self),
-                    ('__lt__', lambda self, other: not other <= self),
-                    ('__gt__', lambda self, other: not self <= other)],
+                       ('__lt__', lambda self, other: not other <= self),
+                       ('__gt__', lambda self, other: not self <= other)],
             '__gt__': [('__lt__', lambda self, other: other > self),
-                    ('__ge__', lambda self, other: not other > self),
-                    ('__le__', lambda self, other: not self > other)],
+                       ('__ge__', lambda self, other: not other > self),
+                       ('__le__', lambda self, other: not self > other)],
             '__ge__': [('__le__', lambda self, other: other >= self),
-                    ('__gt__', lambda self, other: not other >= self),
-                    ('__lt__', lambda self, other: not self >= other)]
+                       ('__gt__', lambda self, other: not other >= self),
+                       ('__lt__', lambda self, other: not self >= other)]
         }
         roots = set(dir(cls)) & set(convert)
         if not roots:
             raise ValueError('must define at least one ordering operation: < > <= >=')  # noqa
-        root = max(roots)       # prefer __lt__ to __le__ to __gt__ to __ge__
+        root = max(roots)  # prefer __lt__ to __le__ to __gt__ to __ge__
         for opname, opfunc in convert[root]:
             if opname not in roots:
                 opfunc.__name__ = opname
                 opfunc.__doc__ = getattr(int, opname).__doc__
                 setattr(cls, opname, opfunc)
         return cls
-
diff --git a/tests/test_spliter.py b/tests/test_spliter.py
index 34402aa0..87fac56b 100644
--- a/tests/test_spliter.py
+++ b/tests/test_spliter.py
@@ -91,6 +91,10 @@ def test_assign():
     assert split("a = b") == ["a", " ", "=", " ", "b"]
 
 
+def test_assign_unicode():
+    assert split("α = β") == ["α", " ", "=", " ", "β"]
+
+
 def test_call():
     assert split("function()") == ["function", "(", ")"]
 
@@ -247,7 +251,9 @@ def test_if():
 
 
 def test_if_elif_else():
-    assert split("if a:\n pass\nelif b:\n pass\nelse: \n pass") == ["if", " ", "a", ":", "\n", " ", "pass", "\n", "elif", " ", "b", ":", "\n", " ", "pass", "\n", "else", ":", " ", "\n", " ", "pass"]
+    assert split("if a:\n pass\nelif b:\n pass\nelse: \n pass") == ["if", " ", "a", ":", "\n", " ", "pass", "\n",
+                                                                    "elif", " ", "b", ":", "\n", " ", "pass", "\n",
+                                                                    "else", ":", " ", "\n", " ", "pass"]
 
 
 def test_while():
@@ -365,7 +371,9 @@ def test_backslash_in_comment():
 
 
 def test_regression():
-    assert split("(r'[\"\\'](.|\n|\r)*[\"\\']', 'STRING'),") == ["(", "r", "'[\"\\'](.|\n|\r)*[\"\\']'", ",", " ", "'STRING'", ")", ","]
+    assert split("(r'[\"\\'](.|\n|\r)*[\"\\']', 'STRING'),") == ["(", "r", "'[\"\\'](.|\n|\r)*[\"\\']'", ",", " ",
+                                                                 "'STRING'", ")", ","]
+
 
 # TODO: make this test pass in python3 also
 # requires to remove dependency on ast.py

From e6628bcdc58e9c80bd65fb01ddc5f0baa76d1307 Mon Sep 17 00:00:00 2001
From: Dylan Lukes <lukes.dylan@gmail.com>
Date: Fri, 24 Jan 2020 16:03:17 -0800
Subject: [PATCH 5/6] minor cleanup

---
 baron/spliter.py | 2 +-
 baron/utils.py   | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/baron/spliter.py b/baron/spliter.py
index 956d8f19..5f3dcc48 100644
--- a/baron/spliter.py
+++ b/baron/spliter.py
@@ -67,7 +67,7 @@ def split_generator(sequence):
                 not_found = False
                 yield next(iterator)
 
-        if iterator.show_next().__repr__().startswith("'\\x"):
+        if iterator.show_next().__repr__().startswith(r"'\x"):
             # guys, seriously, how do you manage to put this shit in your code?
             # I mean, I don't even know how this is possible!
             # example of guilty file: ve/lib/python2.7/site-packages/tests/test_oauth.py
diff --git a/baron/utils.py b/baron/utils.py
index cc7d709d..c4ce7c49 100644
--- a/baron/utils.py
+++ b/baron/utils.py
@@ -1,5 +1,4 @@
 import sys
-import unicodedata
 
 import regex as re
 

From 4c202094412938e21dab6c9d108ea8edaac049a2 Mon Sep 17 00:00:00 2001
From: Dylan Lukes <lukes.dylan@gmail.com>
Date: Fri, 24 Jan 2020 16:28:41 -0800
Subject: [PATCH 6/6] add six, make unicode tests conditional on py version

---
 baron/render.py         | 4 +++-
 baron/utils.py          | 3 ++-
 requirements.txt        | 3 ++-
 setup.py                | 2 +-
 tests/test_path.py      | 7 ++++---
 tests/test_spliter.py   | 9 +++++----
 tests/test_tokenizer.py | 9 +++++----
 7 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/baron/render.py b/baron/render.py
index 7addb110..6234a2cf 100644
--- a/baron/render.py
+++ b/baron/render.py
@@ -1,6 +1,8 @@
 import sys
 import json
 
+import six
+
 
 def render(node, strict=False):
     """Recipe to render a given FST node.
@@ -69,7 +71,7 @@ def render_node(node, strict=False):
                 if key_type == "key":
                     assert isinstance(node[render_key], (dict, type(None))), "Key '%s' is expected to have type of 'key' (dict/None) but has type of '%s' instead" % (render_key, type(node[render_key]))
                 elif key_type == "string":
-                    assert isinstance(node[render_key], str), "Key '%s' is expected to have type of 'string' but has type of '%s' instead" % (render_key, type(node[render_key]))
+                    assert isinstance(node[render_key], six.string_types), "Key '%s' is expected to have type of 'string' but has type of '%s' instead" % (render_key, type(node[render_key]))
                 elif key_type in ("list", "formatting"):
                     assert isinstance(node[render_key], list), "Key '%s' is expected to have type of 'list' but has type of '%s' instead" % (render_key, type(node[render_key]))
                 elif key_type == "constant":
diff --git a/baron/utils.py b/baron/utils.py
index c4ce7c49..ed3df74e 100644
--- a/baron/utils.py
+++ b/baron/utils.py
@@ -1,10 +1,11 @@
 import sys
 
 import regex as re
+import six
 
 python_version = sys.version_info[0]
 python_subversion = sys.version_info[1]
-string_instance = str if python_version == 3 else basestring
+string_instance = six.string_types  # alias, for isinstance usage in redbaron
 
 
 class BaronError(Exception):
diff --git a/requirements.txt b/requirements.txt
index e84fabe6..92268f84 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 rply
-regex
\ No newline at end of file
+regex
+six
\ No newline at end of file
diff --git a/setup.py b/setup.py
index fa2ad351..5c96d51d 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
       long_description=read_md("README.md") + "\n\n" + open("CHANGELOG", "r").read(),
       author_email='cortex@worlddomination.be',
       url='https://github.com/PyCQA/baron',
-      install_requires=['rply', 'regex'],
+      install_requires=['rply', 'regex', 'six'],
       packages=['baron'],
       license='lgplv3+',
       scripts=[],
diff --git a/tests/test_path.py b/tests/test_path.py
index 99063978..2e80d06d 100644
--- a/tests/test_path.py
+++ b/tests/test_path.py
@@ -1,8 +1,9 @@
+import six
+
 from baron.baron import parse
 from baron.path import PathWalker, Position, BoundingBox
-from baron.path import position_to_path, path_to_node, position_to_node
 from baron.path import path_to_bounding_box, node_to_bounding_box
-from baron.utils import string_instance
+from baron.path import position_to_path, path_to_node, position_to_node
 
 
 def test_position():
@@ -145,7 +146,7 @@ def check_path(code, positions, target_path):
             return
 
         node = path_to_node(tree, path)
-        assert isinstance(node, string_instance)
+        assert isinstance(node, six.string_types)
 
         assert position_to_node(tree, position) is node
 
diff --git a/tests/test_spliter.py b/tests/test_spliter.py
index 87fac56b..2f9bde6f 100644
--- a/tests/test_spliter.py
+++ b/tests/test_spliter.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 # -*- coding:Utf-8 -*-
-
+import six
 
 from baron.spliter import split, UntreatedError
 from baron.utils import python_version
@@ -91,8 +91,9 @@ def test_assign():
     assert split("a = b") == ["a", " ", "=", " ", "b"]
 
 
-def test_assign_unicode():
-    assert split("α = β") == ["α", " ", "=", " ", "β"]
+if six.PY3:
+    def test_assign_unicode():
+        assert split("α = β") == ["α", " ", "=", " ", "β"]
 
 
 def test_call():
@@ -377,6 +378,6 @@ def test_regression():
 
 # TODO: make this test pass in python3 also
 # requires to remove dependency on ast.py
-if python_version == 2:
+if six.PY2:
     def test_remove_crap():
         assert split("\x0c\xef\xbb\xbf") == []
diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py
index 01d03809..da5ad037 100644
--- a/tests/test_tokenizer.py
+++ b/tests/test_tokenizer.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 # -*- coding:Utf-8 -*-
-
+import six
 
 from baron.tokenizer import tokenize, KEYWORDS
 
@@ -22,9 +22,10 @@ def test_name__():
     match('_a', 'NAME')
 
 
-def test_name_unicode():
-    match('β', 'NAME')
-    match('가사', 'NAME')
+if six.PY3:
+    def test_name_unicode():
+        match('β', 'NAME')
+        match('가사', 'NAME')
 
 
 def test_name_number():