https://github.com/python/cpython/commit/1aa8bbe62f27b564cf15e2aad591c62744354a4e
commit: 1aa8bbe62f27b564cf15e2aad591c62744354a4e
branch: main
author: Ivan Savin <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2024-04-17T10:14:22Z
summary:

bpo-40944: Fix IndexError when parse emails with truncated Message-ID, address, 
routes, etc (GH-20790)

Co-authored-by: Serhiy Storchaka <[email protected]>

files:
A Misc/NEWS.d/next/Library/2020-06-10-19-24-17.bpo-40943.vjiiN_.rst
M Lib/email/_header_value_parser.py
M Lib/test/test_email/test__header_value_parser.py

diff --git a/Lib/email/_header_value_parser.py 
b/Lib/email/_header_value_parser.py
index 7755e629500a8f..d1b4c7df4f445f 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -1213,7 +1213,7 @@ def get_bare_quoted_string(value):
     value is the text between the quote marks, with whitespace
     preserved and quoted pairs decoded.
     """
-    if value[0] != '"':
+    if not value or value[0] != '"':
         raise errors.HeaderParseError(
             "expected '\"' but found '{}'".format(value))
     bare_quoted_string = BareQuotedString()
@@ -1454,7 +1454,7 @@ def get_local_part(value):
     """
     local_part = LocalPart()
     leader = None
-    if value[0] in CFWS_LEADER:
+    if value and value[0] in CFWS_LEADER:
         leader, value = get_cfws(value)
     if not value:
         raise errors.HeaderParseError(
@@ -1613,7 +1613,7 @@ def get_domain(value):
     """
     domain = Domain()
     leader = None
-    if value[0] in CFWS_LEADER:
+    if value and value[0] in CFWS_LEADER:
         leader, value = get_cfws(value)
     if not value:
         raise errors.HeaderParseError(
@@ -1689,6 +1689,8 @@ def get_obs_route(value):
         if value[0] in CFWS_LEADER:
             token, value = get_cfws(value)
             obs_route.append(token)
+        if not value:
+            break
         if value[0] == '@':
             obs_route.append(RouteComponentMarker)
             token, value = get_domain(value[1:])
@@ -1707,7 +1709,7 @@ def get_angle_addr(value):
 
     """
     angle_addr = AngleAddr()
-    if value[0] in CFWS_LEADER:
+    if value and value[0] in CFWS_LEADER:
         token, value = get_cfws(value)
         angle_addr.append(token)
     if not value or value[0] != '<':
@@ -1717,7 +1719,7 @@ def get_angle_addr(value):
     value = value[1:]
     # Although it is not legal per RFC5322, SMTP uses '<>' in certain
     # circumstances.
-    if value[0] == '>':
+    if value and value[0] == '>':
         angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
         angle_addr.defects.append(errors.InvalidHeaderDefect(
             "null addr-spec in angle-addr"))
@@ -1769,6 +1771,9 @@ def get_name_addr(value):
     name_addr = NameAddr()
     # Both the optional display name and the angle-addr can start with cfws.
     leader = None
+    if not value:
+        raise errors.HeaderParseError(
+            "expected name-addr but found '{}'".format(value))
     if value[0] in CFWS_LEADER:
         leader, value = get_cfws(value)
         if not value:
diff --git a/Lib/test/test_email/test__header_value_parser.py 
b/Lib/test/test_email/test__header_value_parser.py
index 39ed4ee2a38159..56a1e3a3de5aa2 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -801,6 +801,10 @@ def test_get_quoted_string_header_ends_in_qcontent(self):
         self.assertEqual(qs.content, 'bob')
         self.assertEqual(qs.quoted_value, ' "bob"')
 
+    def test_get_quoted_string_cfws_only_raises(self):
+        with self.assertRaises(errors.HeaderParseError):
+            parser.get_quoted_string(' (foo) ')
+
     def test_get_quoted_string_no_quoted_string(self):
         with self.assertRaises(errors.HeaderParseError):
             parser.get_quoted_string(' (ab) xyz')
@@ -1135,6 +1139,10 @@ def test_get_local_part_complex_obsolete_invalid(self):
             '@python.org')
         self.assertEqual(local_part.local_part, 'Fred.A.Johnson and  dogs')
 
+    def test_get_local_part_empty_raises(self):
+        with self.assertRaises(errors.HeaderParseError):
+            parser.get_local_part('')
+
     def test_get_local_part_no_part_raises(self):
         with self.assertRaises(errors.HeaderParseError):
             parser.get_local_part(' (foo) ')
@@ -1387,6 +1395,10 @@ def test_get_domain_obsolete(self):
                                   '')
         self.assertEqual(domain.domain, 'example.com')
 
+    def test_get_domain_empty_raises(self):
+        with self.assertRaises(errors.HeaderParseError):
+            parser.get_domain("")
+
     def test_get_domain_no_non_cfws_raises(self):
         with self.assertRaises(errors.HeaderParseError):
             parser.get_domain("  (foo)\t")
@@ -1512,6 +1524,10 @@ def test_get_obs_route_no_route_before_end_raises(self):
         with self.assertRaises(errors.HeaderParseError):
             parser.get_obs_route('(foo) @example.com,')
 
+    def test_get_obs_route_no_route_before_end_raises2(self):
+        with self.assertRaises(errors.HeaderParseError):
+            parser.get_obs_route('(foo) @example.com, (foo) ')
+
     def test_get_obs_route_no_route_before_special_raises(self):
         with self.assertRaises(errors.HeaderParseError):
             parser.get_obs_route('(foo) [abc],')
@@ -1520,6 +1536,14 @@ def 
test_get_obs_route_no_route_before_special_raises2(self):
         with self.assertRaises(errors.HeaderParseError):
             parser.get_obs_route('(foo) @example.com [abc],')
 
+    def test_get_obs_route_no_domain_after_at_raises(self):
+        with self.assertRaises(errors.HeaderParseError):
+            parser.get_obs_route('@')
+
+    def test_get_obs_route_no_domain_after_at_raises2(self):
+        with self.assertRaises(errors.HeaderParseError):
+            parser.get_obs_route('@example.com, @')
+
     # get_angle_addr
 
     def test_get_angle_addr_simple(self):
@@ -1646,6 +1670,14 @@ def test_get_angle_addr_ends_at_special(self):
         self.assertIsNone(angle_addr.route)
         self.assertEqual(angle_addr.addr_spec, '[email protected]')
 
+    def test_get_angle_addr_empty_raise(self):
+        with self.assertRaises(errors.HeaderParseError):
+            parser.get_angle_addr('')
+
+    def test_get_angle_addr_left_angle_only_raise(self):
+        with self.assertRaises(errors.HeaderParseError):
+            parser.get_angle_addr('<')
+
     def test_get_angle_addr_no_angle_raise(self):
         with self.assertRaises(errors.HeaderParseError):
             parser.get_angle_addr('(foo) ')
@@ -1857,6 +1889,10 @@ def test_get_name_addr_ends_at_special(self):
         self.assertIsNone(name_addr.route)
         self.assertEqual(name_addr.addr_spec, '[email protected]')
 
+    def test_get_name_addr_empty_raises(self):
+        with self.assertRaises(errors.HeaderParseError):
+            parser.get_name_addr('')
+
     def test_get_name_addr_no_content_raises(self):
         with self.assertRaises(errors.HeaderParseError):
             parser.get_name_addr(' (foo) ')
@@ -2732,6 +2768,10 @@ def test_get_msg_id_empty_id_right(self):
         with self.assertRaises(errors.HeaderParseError):
             parser.get_msg_id("<simplelocal@>")
 
+    def test_get_msg_id_no_id_right(self):
+        with self.assertRaises(errors.HeaderParseError):
+            parser.get_msg_id("<simplelocal@")
+
     def test_get_msg_id_with_brackets(self):
         # Microsof Outlook generates non-standard one-off addresses:
         # 
https://learn.microsoft.com/en-us/office/client-developer/outlook/mapi/one-off-addresses
diff --git a/Misc/NEWS.d/next/Library/2020-06-10-19-24-17.bpo-40943.vjiiN_.rst 
b/Misc/NEWS.d/next/Library/2020-06-10-19-24-17.bpo-40943.vjiiN_.rst
new file mode 100644
index 00000000000000..2018e857830d1e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-06-10-19-24-17.bpo-40943.vjiiN_.rst
@@ -0,0 +1 @@
+Fix several IndexError when parse emails with truncated Message-ID, address, 
routes, etc, e.g. ``example@``.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to