https://github.com/python/cpython/commit/5aaf41685834901e4ed0a40f4c055b92991a0bb5
commit: 5aaf41685834901e4ed0a40f4c055b92991a0bb5
branch: main
author: Mike Edmunds <[email protected]>
committer: bitdancer <[email protected]>
date: 2025-01-18T19:50:52-05:00
summary:
gh-80222: Fix email address header folding with long quoted-string (#122753)
Email generators using email.policy.default could incorrectly omit the
quote ('"') characters from a quoted-string during header refolding,
leading to invalid address headers and enabling header spoofing. This
change restores the quote characters on a bare-quoted-string as the
header is refolded, and escapes backslash and quote chars in the string.
files:
A Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst
M Lib/email/_header_value_parser.py
M Lib/test/test_email/test__header_value_parser.py
diff --git a/Lib/email/_header_value_parser.py
b/Lib/email/_header_value_parser.py
index ec2215a5e5f33c..3d845c09d415f6 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -95,8 +95,16 @@
NLSET = {'\n', '\r'}
SPECIALSNL = SPECIALS | NLSET
+
+def make_quoted_pairs(value):
+ """Escape dquote and backslash for use within a quoted-string."""
+ return str(value).replace('\\', '\\\\').replace('"', '\\"')
+
+
def quote_string(value):
- return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
+ escaped = make_quoted_pairs(value)
+ return f'"{escaped}"'
+
# Match a RFC 2047 word, looks like =?utf-8?q?someword?=
rfc2047_matcher = re.compile(r'''
@@ -2905,6 +2913,15 @@ def _refold_parse_tree(parse_tree, *, policy):
if not hasattr(part, 'encode'):
# It's not a terminal, try folding the subparts.
newparts = list(part)
+ if part.token_type == 'bare-quoted-string':
+ # To fold a quoted string we need to create a list of terminal
+ # tokens that will render the leading and trailing quotes
+ # and use quoted pairs in the value as appropriate.
+ newparts = (
+ [ValueTerminal('"', 'ptext')] +
+ [ValueTerminal(make_quoted_pairs(p), 'ptext')
+ for p in newparts] +
+ [ValueTerminal('"', 'ptext')])
if not part.as_ew_allowed:
wrap_as_ew_blocked += 1
newparts.append(end_ew_not_allowed)
diff --git a/Lib/test/test_email/test__header_value_parser.py
b/Lib/test/test_email/test__header_value_parser.py
index 95224e19f67ce5..d60a7039f9d4c6 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -3082,13 +3082,40 @@ def
test_address_list_with_list_separator_after_fold(self):
self._test(parser.get_address_list(to)[0],
f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus <[email protected]>\n')
- a = '.' * 79
+ a = '.' * 79 # ('.' is a special, so must be in quoted-string.)
to = f'"{a}" <[email protected]>, "Hübsch Kaktus"
<[email protected]>'
self._test(parser.get_address_list(to)[0],
- f'{a}\n'
+ f'"{a}"\n'
' <[email protected]>, =?utf-8?q?H=C3=BCbsch?= Kaktus '
'<[email protected]>\n')
+ def test_address_list_with_specials_in_long_quoted_string(self):
+ # Regression for gh-80222.
+ policy = self.policy.clone(max_line_length=40)
+ cases = [
+ # (to, folded)
+ ('"Exfiltrator <[email protected]> (unclosed comment?"
<[email protected]>',
+ '"Exfiltrator <[email protected]> (unclosed\n'
+ ' comment?" <[email protected]>\n'),
+ ('"Escaped \\" chars \\\\ in quoted-string stay escaped"
<[email protected]>',
+ '"Escaped \\" chars \\\\ in quoted-string\n'
+ ' stay escaped" <[email protected]>\n'),
+ ('This long display name does not need quotes <[email protected]>',
+ 'This long display name does not need\n'
+ ' quotes <[email protected]>\n'),
+ ('"Quotes are not required but are retained here"
<[email protected]>',
+ '"Quotes are not required but are\n'
+ ' retained here" <[email protected]>\n'),
+ ('"A quoted-string, it can be a valid local-part"@example.com',
+ '"A quoted-string, it can be a valid\n'
+ ' local-part"@example.com\n'),
+ ('"[email protected]"@example.com',
+
'"[email protected]"@example.com\n'),
+ ]
+ for (to, folded) in cases:
+ with self.subTest(to=to):
+ self._test(parser.get_address_list(to)[0], folded,
policy=policy)
+
# XXX Need tests with comments on various sides of a unicode token,
# and with unicode tokens in the comments. Spaces inside the quotes
# currently don't do the right thing.
diff --git
a/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst
b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst
new file mode 100644
index 00000000000000..0f0661d0b1cf4a
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst
@@ -0,0 +1,6 @@
+Fix bug in the folding of quoted strings when flattening an email message using
+a modern email policy. Previously when a quoted string was folded so that
+it spanned more than one line, the surrounding quotes and internal escapes
+would be omitted. This could theoretically be used to spoof header lines
+using a carefully constructed quoted string if the resulting rendered email
+was transmitted or re-parsed.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]