Serhiy Storchaka added the comment:
Well, here is a patch which makes re raise an exception on suspicious octals.
----------
Added file:
http://bugs.python.org/file36602/re_octal_escape_overflow_raise.patch
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue22362>
_______________________________________
diff -r 180f5bf7d1b9 Lib/sre_parse.py
--- a/Lib/sre_parse.py Thu Sep 11 14:33:02 2014 +0300
+++ b/Lib/sre_parse.py Thu Sep 11 23:31:31 2014 +0300
@@ -283,7 +283,11 @@ def _class_escape(source, escape):
elif c in OCTDIGITS:
# octal escape (up to three digits)
escape += source.getwhile(2, OCTDIGITS)
- return LITERAL, int(escape[1:], 8) & 0xff
+ c = int(escape[1:], 8)
+ if c > 0o377:
+ raise error('octal escape value %r outside of '
+ 'range 0-0o377' % escape)
+ return LITERAL, c
elif c in DIGITS:
raise ValueError
if len(escape) == 2:
@@ -325,7 +329,7 @@ def _escape(source, escape, state):
elif c == "0":
# octal escape
escape += source.getwhile(2, OCTDIGITS)
- return LITERAL, int(escape[1:], 8) & 0xff
+ return LITERAL, int(escape[1:], 8)
elif c in DIGITS:
# octal escape *or* decimal group reference (sigh)
if source.next in DIGITS:
@@ -334,7 +338,11 @@ def _escape(source, escape, state):
source.next in OCTDIGITS):
# got three octal digits; this is an octal escape
escape = escape + source.get()
- return LITERAL, int(escape[1:], 8) & 0xff
+ c = int(escape[1:], 8)
+ if c > 0o377:
+ raise error('octal escape value %r outside of '
+ 'range 0-0o377' % escape)
+ return LITERAL, c
# not an octal escape, so this is a group reference
group = int(escape[1:])
if group < state.groups:
@@ -825,7 +833,11 @@ def parse_template(source, pattern):
s.next in OCTDIGITS):
this += sget()
isoctal = True
- lappend(chr(int(this[1:], 8) & 0xff))
+ c = int(this[1:], 8)
+ if c > 0o377:
+ raise error('octal escape value %r outside of '
+ 'range 0-0o377' % this)
+ lappend(chr(c))
if not isoctal:
addgroup(int(this[1:]))
else:
diff -r 180f5bf7d1b9 Lib/test/test_re.py
--- a/Lib/test/test_re.py Thu Sep 11 14:33:02 2014 +0300
+++ b/Lib/test/test_re.py Thu Sep 11 23:31:31 2014 +0300
@@ -154,8 +154,8 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
- self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
- self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
+ self.assertRaises(re.error, re.sub, 'x', r'\400', 'x')
+ self.assertRaises(re.error, re.sub, 'x', r'\777', 'x')
self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
@@ -691,7 +691,7 @@ class ReTests(unittest.TestCase):
self.assertIsNotNone(re.match(r"\08", "\0008"))
self.assertIsNotNone(re.match(r"\01", "\001"))
self.assertIsNotNone(re.match(r"\018", "\0018"))
- self.assertIsNotNone(re.match(r"\567", chr(0o167)))
+ self.assertRaises(re.error, re.match, r"\567", "")
self.assertRaises(re.error, re.match, r"\911", "")
self.assertRaises(re.error, re.match, r"\x1", "")
self.assertRaises(re.error, re.match, r"\x1z", "")
@@ -719,6 +719,7 @@ class ReTests(unittest.TestCase):
self.assertIsNotNone(re.match(r"[\U%08x]" % i, chr(i)))
self.assertIsNotNone(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
self.assertIsNotNone(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
+ self.assertRaises(re.error, re.match, r"[\567]", "")
self.assertIsNotNone(re.match(r"[\U0001d49c-\U0001d4b5]",
"\U0001d49e"))
self.assertRaises(re.error, re.match, r"[\911]", "")
self.assertRaises(re.error, re.match, r"[\x1z]", "")
@@ -740,7 +741,7 @@ class ReTests(unittest.TestCase):
self.assertIsNotNone(re.match(br"\08", b"\0008"))
self.assertIsNotNone(re.match(br"\01", b"\001"))
self.assertIsNotNone(re.match(br"\018", b"\0018"))
- self.assertIsNotNone(re.match(br"\567", bytes([0o167])))
+ self.assertRaises(re.error, re.match, br"\567", b"")
self.assertRaises(re.error, re.match, br"\911", b"")
self.assertRaises(re.error, re.match, br"\x1", b"")
self.assertRaises(re.error, re.match, br"\x1z", b"")
@@ -755,6 +756,7 @@ class ReTests(unittest.TestCase):
self.assertIsNotNone(re.match((r"[\x%02x]" % i).encode(),
bytes([i])))
self.assertIsNotNone(re.match((r"[\x%02x0]" % i).encode(),
bytes([i])))
self.assertIsNotNone(re.match((r"[\x%02xz]" % i).encode(),
bytes([i])))
+ self.assertRaises(re.error, re.match, br"[\567]", b"")
self.assertIsNotNone(re.match(br"[\u]", b'u'))
self.assertIsNotNone(re.match(br"[\U]", b'U'))
self.assertRaises(re.error, re.match, br"[\911]", "")
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com