https://github.com/python/cpython/commit/fa9c3eefd475f0647a69bf3f49db8100848fb6a9
commit: fa9c3eefd475f0647a69bf3f49db8100848fb6a9
branch: main
author: Abhishek Tiwari <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2025-11-04T12:54:28+02:00
summary:
gh-140797: Forbid capturing groups in re.Scanner lexicon patterns (GH-140944)
files:
A Misc/NEWS.d/next/Library/2025-11-03-16-23-54.gh-issue-140797.DuFEeR.rst
M Lib/re/__init__.py
M Lib/test/test_re.py
diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py
index a5316391297f4c..ecec16e9005f3b 100644
--- a/Lib/re/__init__.py
+++ b/Lib/re/__init__.py
@@ -397,9 +397,12 @@ def __init__(self, lexicon, flags=0):
s = _parser.State()
s.flags = flags
for phrase, action in lexicon:
+ sub_pattern = _parser.parse(phrase, flags)
+ if sub_pattern.state.groups != 1:
+ raise ValueError("Cannot use capturing groups in re.Scanner")
gid = s.opengroup()
p.append(_parser.SubPattern(s, [
- (SUBPATTERN, (gid, 0, 0, _parser.parse(phrase, flags))),
+ (SUBPATTERN, (gid, 0, 0, sub_pattern)),
]))
s.closegroup(gid, p[-1])
p = _parser.SubPattern(s, [(BRANCH, (None, p))])
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 5fc95087f2b6ad..9f6f04bf6b8347 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1639,6 +1639,24 @@ def s_int(scanner, token): return int(token)
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
'op+', 'bar'], ''))
+ def test_bug_gh140797(self):
+ # gh140797: Capturing groups are not allowed in re.Scanner
+
+ msg = r"Cannot use capturing groups in re\.Scanner"
+ # Capturing group throws an error
+ with self.assertRaisesRegex(ValueError, msg):
+ Scanner([("(a)b", None)])
+
+ # Named Group
+ with self.assertRaisesRegex(ValueError, msg):
+ Scanner([("(?P<name>a)", None)])
+
+ # Non-capturing groups should pass normally
+ s = Scanner([("(?:a)b", lambda scanner, token: token)])
+ result, rem = s.scan("ab")
+ self.assertEqual(result,['ab'])
+ self.assertEqual(rem,'')
+
def test_bug_448951(self):
# bug 448951 (similar to 429357, but with single char match)
# (Also test greedy matches.)
diff --git
a/Misc/NEWS.d/next/Library/2025-11-03-16-23-54.gh-issue-140797.DuFEeR.rst
b/Misc/NEWS.d/next/Library/2025-11-03-16-23-54.gh-issue-140797.DuFEeR.rst
new file mode 100644
index 00000000000000..493b740261e64c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-03-16-23-54.gh-issue-140797.DuFEeR.rst
@@ -0,0 +1,2 @@
+The undocumented :class:`!re.Scanner` class now forbids regular expressions
containing capturing groups in its lexicon patterns. Patterns using capturing
groups could
+previously lead to crashes with segmentation fault. Use non-capturing groups
(?:...) instead.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]