https://github.com/python/cpython/commit/bc9e63dd9d2931771415cca1b0ed774471d523c0
commit: bc9e63dd9d2931771415cca1b0ed774471d523c0
branch: main
author: Alper <[email protected]>
committer: colesbury <[email protected]>
date: 2025-11-26T15:40:45-05:00
summary:

gh-116738: Fix thread-safety issue in re module for free threading (gh-141923)

Added atomic operations to `scanner_begin()` and `scanner_end()` to prevent
race conditions on the `executing` flag in free-threaded builds. Also added
tests for concurrent usage of the `re` module.

Without the atomic operations, `test_scanner_concurrent_access()` triggers
`assert(self->executing)` failures, or a thread sanitizer run emits errors.

files:
A Lib/test/test_free_threading/test_re.py
A 
Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst
M Include/internal/pycore_pyatomic_ft_wrappers.h
M Modules/_sre/sre.c

diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h 
b/Include/internal/pycore_pyatomic_ft_wrappers.h
index c31c33657002ec..2ae0185226f847 100644
--- a/Include/internal/pycore_pyatomic_ft_wrappers.h
+++ b/Include/internal/pycore_pyatomic_ft_wrappers.h
@@ -77,6 +77,10 @@ extern "C" {
     _Py_atomic_store_ushort_relaxed(&value, new_value)
 #define FT_ATOMIC_LOAD_USHORT_RELAXED(value) \
     _Py_atomic_load_ushort_relaxed(&value)
+#define FT_ATOMIC_LOAD_INT(value) \
+    _Py_atomic_load_int(&value)
+#define FT_ATOMIC_STORE_INT(value, new_value) \
+    _Py_atomic_store_int(&value, new_value)
 #define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) \
     _Py_atomic_store_int_relaxed(&value, new_value)
 #define FT_ATOMIC_LOAD_INT_RELAXED(value) \
@@ -144,6 +148,8 @@ extern "C" {
 #define FT_ATOMIC_STORE_SHORT_RELAXED(value, new_value) value = new_value
 #define FT_ATOMIC_LOAD_USHORT_RELAXED(value) value
 #define FT_ATOMIC_STORE_USHORT_RELAXED(value, new_value) value = new_value
+#define FT_ATOMIC_LOAD_INT(value) value
+#define FT_ATOMIC_STORE_INT(value, new_value) value = new_value
 #define FT_ATOMIC_LOAD_INT_RELAXED(value) value
 #define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) value = new_value
 #define FT_ATOMIC_LOAD_UINT_RELAXED(value) value
diff --git a/Lib/test/test_free_threading/test_re.py 
b/Lib/test/test_free_threading/test_re.py
new file mode 100644
index 00000000000000..56f25045d1bf8e
--- /dev/null
+++ b/Lib/test/test_free_threading/test_re.py
@@ -0,0 +1,62 @@
+import re
+import unittest
+
+from test.support import threading_helper
+from test.support.threading_helper import run_concurrently
+
+
+NTHREADS = 10
+
+
+@threading_helper.requires_working_threading()
+class TestRe(unittest.TestCase):
+    def test_pattern_sub(self):
+        """Pattern substitution should work across threads"""
+        pattern = re.compile(r"\w+@\w+\.\w+")
+        text = "e-mail: [email protected] or [email protected]. " * 5
+        results = []
+
+        def worker():
+            substituted = pattern.sub("(redacted)", text)
+            results.append(substituted.count("(redacted)"))
+
+        run_concurrently(worker_func=worker, nthreads=NTHREADS)
+        self.assertEqual(results, [2 * 5] * NTHREADS)
+
+    def test_pattern_search(self):
+        """Pattern search should work across threads."""
+        emails = ["[email protected]", "[email protected]"] * 10
+        pattern = re.compile(r"\w+@\w+\.\w+")
+        results = []
+
+        def worker():
+            matches = [pattern.search(e).group() for e in emails]
+            results.append(len(matches))
+
+        run_concurrently(worker_func=worker, nthreads=NTHREADS)
+        self.assertEqual(results, [2 * 10] * NTHREADS)
+
+    def test_scanner_concurrent_access(self):
+        """Shared scanner should reject concurrent access."""
+        pattern = re.compile(r"\w+")
+        scanner = pattern.scanner("word " * 10)
+
+        def worker():
+            for _ in range(100):
+                try:
+                    scanner.search()
+                except ValueError as e:
+                    if "already executing" in str(e):
+                        pass
+                    else:
+                        raise
+
+        run_concurrently(worker_func=worker, nthreads=NTHREADS)
+        # This test has no assertions. Its purpose is to catch crashes and
+        # enable thread sanitizer to detect race conditions. While "already
+        # executing" errors are very likely, they're not guaranteed due to
+        # non-deterministic thread scheduling, so we can't assert errors > 0.
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst
new file mode 100644
index 00000000000000..151f8968292a61
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst
@@ -0,0 +1,2 @@
+Fix thread safety issue with :mod:`re` scanner objects in free-threaded
+builds.
diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c
index 4e97101b699876..59ff9078e6cff4 100644
--- a/Modules/_sre/sre.c
+++ b/Modules/_sre/sre.c
@@ -2841,20 +2841,25 @@ scanner_dealloc(PyObject *self)
 static int
 scanner_begin(ScannerObject* self)
 {
-    if (self->executing) {
+#ifdef Py_GIL_DISABLED
+    int was_executing = _Py_atomic_exchange_int(&self->executing, 1);
+#else
+    int was_executing = self->executing;
+    self->executing = 1;
+#endif
+    if (was_executing) {
         PyErr_SetString(PyExc_ValueError,
                         "regular expression scanner already executing");
         return 0;
     }
-    self->executing = 1;
     return 1;
 }
 
 static void
 scanner_end(ScannerObject* self)
 {
-    assert(self->executing);
-    self->executing = 0;
+    assert(FT_ATOMIC_LOAD_INT_RELAXED(self->executing));
+    FT_ATOMIC_STORE_INT(self->executing, 0);
 }
 
 /*[clinic input]

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to