From 6bcac2da733da9c00128c5bd134695bb6cc5557b Mon Sep 17 00:00:00 2001 From: Mikhail Krivushin Date: Thu, 17 May 2018 12:24:03 +0400 Subject: [PATCH] fix regexp to match rfc 6532 addresses --- tests/test_base.py | 3 +++ trafaret/internet.py | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_base.py b/tests/test_base.py index bb3589a..e2da1f2 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -265,6 +265,9 @@ def test_email(self): res = extract_error(t.Email, 123) self.assertEqual(res, 'value is not a string') + def test_with_cyrillic_C_in_contact(self): + res = t.Email.check(u'contaсt@kmgaszbut.104.ua') + self.assertEqual(res, u'contaсt@kmgaszbut.104.ua') class TestEnumTrafaret(unittest.TestCase): def test_enum(self): diff --git a/trafaret/internet.py b/trafaret/internet.py index ea36680..52d57ac 100644 --- a/trafaret/internet.py +++ b/trafaret/internet.py @@ -13,17 +13,19 @@ MAX_EMAIL_LEN = 254 +ATEXT = r"-!#$%&'*+/=?^_`{}|~0-9A-Z" +ATEXT_UTF8 = ATEXT + u"\u0080-\U0010FFFF" EMAIL_REGEXP = re.compile( # dot-atom - r"(?P^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*" + r"(?P^[" + ATEXT_UTF8 + "]+(\.[" + ATEXT_UTF8 + "]+)*" # quoted-string r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-011\013\014\016-\177])*"' # domain r')@(?P(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)$)' # literal form, ipv4 address (SMTP 4.1.3) r'|\[(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\]$', - re.IGNORECASE, + re.IGNORECASE | re.UNICODE, )