Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1461,6 +1461,16 @@ def get_phrase(value):
else:
try:
token, value = get_word(value)
if (token[0].token_type == 'encoded-word'
and phrase
and phrase[-1].token_type == 'atom'
and len(phrase[-1]) > 1
and phrase[-1][-2].token_type == 'encoded-word'
and phrase[-1][-1].token_type == 'cfws'
and not phrase[-1][-1].comments
):
# linear ws between ews needs special handing...
phrase[-1][-1] = EWWhiteSpaceTerminal(phrase[-1], 'fws')
except errors.HeaderParseError:
if value[0] in CFWS_LEADER:
token, value = get_cfws(value)
Expand Down
88 changes: 88 additions & 0 deletions Lib/test/test_email/test__header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1060,6 +1060,78 @@ def get_phrase_cfws_only_raises(self):
with self.assertRaises(errors.HeaderParseError):
parser.get_phrase(' (foo) ')

def test_get_phrase_adjacent_ew(self):
# "'linear-white-space' that separates a pair of adjacent
# 'encoded-word's is ignored" (rfc2047 section 6.2)
self._test_get_x(parser.get_phrase, '=?ascii?q?Joi?= \t =?ascii?q?ned?=', 'Joined', 'Joined', [], '')

def test_get_phrase_adjacent_ew_different_encodings(self):
self._test_get_x(
parser.get_phrase,
'=?utf-8?q?B=C3=A9r?= =?iso-8859-1?q?=E9nice?=', 'Bérénice', 'Bérénice', [], ''
)

def test_get_phrase_adjacent_ew_encoded_spaces(self):
self._test_get_x(
parser.get_phrase,
'=?ascii?q?Encoded?= =?ascii?q?_spaces_?= =?ascii?q?preserved?=',
'Encoded spaces preserved',
'Encoded spaces preserved',
[],
''
)

def test_get_phrase_adjacent_ew_comment_is_not_linear_white_space(self):
self._test_get_x(
parser.get_phrase,
'=?ascii?q?Comment?= (is not) =?ascii?q?linear-white-space?=',
'Comment (is not) linear-white-space',
'Comment linear-white-space',
[],
'',
comments=['is not'],
)

def test_get_phrase_adjacent_ew_no_error_on_defects(self):
self._test_get_x(
parser.get_phrase,
'=?ascii?q?Def?= =?ascii?q?ect still joins?=',
'Defect still joins',
'Defect still joins',
[errors.InvalidHeaderDefect], # whitespace inside encoded word
''
)

def test_get_phrase_adjacent_ew_ignore_non_ew(self):
self._test_get_x(
parser.get_phrase,
'=?ascii?q?No?= =?join?= for non-ew',
'No =?join?= for non-ew',
'No =?join?= for non-ew',
[],
''
)

def test_get_phrase_adjacent_ew_ignore_invalid_ew(self):
self._test_get_x(
parser.get_phrase,
'=?ascii?q?No?= =?ascii?rot13?wbva= for invalid ew',
'No =?ascii?rot13?wbva= for invalid ew',
'No =?ascii?rot13?wbva= for invalid ew',
[],
''
)

def test_get_phrase_adjacent_ew_missing_space(self):
self._test_get_x(
parser.get_phrase,
'=?ascii?q?Joi?==?ascii?q?ned?=',
'Joined',
'Joined',
[errors.InvalidHeaderDefect], # missing trailing whitespace
''
)

# get_local_part

def test_get_local_part_simple(self):
Expand Down Expand Up @@ -2387,6 +2459,22 @@ def test_get_address_rfc2047_display_name(self):
self.assertEqual(address[0].token_type,
'mailbox')

def test_get_address_rfc2047_display_name_adjacent_ews(self):
address = self._test_get_x(parser.get_address,
'=?utf-8?q?B=C3=A9r?= =?utf-8?q?=C3=A9nice?= <foo@example.com>',
'Bérénice <foo@example.com>',
'Bérénice <foo@example.com>',
[],
'')
self.assertEqual(address.token_type, 'address')
self.assertEqual(len(address.mailboxes), 1)
self.assertEqual(address.mailboxes,
address.all_mailboxes)
self.assertEqual(address.mailboxes[0].display_name,
'Bérénice')
self.assertEqual(address[0].token_type,
'mailbox')

def test_get_address_empty_group(self):
address = self._test_get_x(parser.get_address,
'Monty Python:;',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fix bug in the parsing of :mod:`email` address headers that could result in
extraneous spaces in the decoded text when using a modern email policy.
Space between pairs of adjacent :rfc:`2047` encoded-words is now ignored, per
section 6.2 (and consistent with existing parsing of unstructured
headers like *Subject*).
Loading