https://github.com/python/cpython/commit/64d6bde38ffb97a6f7c7fee82ba98de81911f1b9
commit: 64d6bde38ffb97a6f7c7fee82ba98de81911f1b9
branch: 3.14
author: Stan Ulbrych <[email protected]>
committer: encukou <[email protected]>
date: 2025-11-27T13:17:39+01:00
summary:

[3.14] gh-98896: resource_tracker: use json&base64 to allow arbitrary shared 
memory names (GH-138473) (GH-141922)


Co-authored-by: Rani Pinchuk <[email protected]>

files:
A Misc/NEWS.d/next/Library/2025-09-03-20-18-39.gh-issue-98896.tjez89.rst
M Lib/multiprocessing/resource_tracker.py
M Lib/test/_test_multiprocessing.py

diff --git a/Lib/multiprocessing/resource_tracker.py 
b/Lib/multiprocessing/resource_tracker.py
index 38fcaed48fa9fb..b0f9099f4a59f3 100644
--- a/Lib/multiprocessing/resource_tracker.py
+++ b/Lib/multiprocessing/resource_tracker.py
@@ -15,6 +15,7 @@
 # this resource tracker process, "killall python" would probably leave unlinked
 # resources.
 
+import base64
 import os
 import signal
 import sys
@@ -22,6 +23,8 @@
 import warnings
 from collections import deque
 
+import json
+
 from . import spawn
 from . import util
 
@@ -196,6 +199,17 @@ def _launch(self):
         finally:
             os.close(r)
 
+    def _make_probe_message(self):
+        """Return a JSON-encoded probe message."""
+        return (
+            json.dumps(
+                {"cmd": "PROBE", "rtype": "noop"},
+                ensure_ascii=True,
+                separators=(",", ":"),
+            )
+            + "\n"
+        ).encode("ascii")
+
     def _ensure_running_and_write(self, msg=None):
         with self._lock:
             if self._lock._recursion_count() > 1:
@@ -207,7 +221,7 @@ def _ensure_running_and_write(self, msg=None):
             if self._fd is not None:
                 # resource tracker was launched before, is it still running?
                 if msg is None:
-                    to_send = b'PROBE:0:noop\n'
+                    to_send = self._make_probe_message()
                 else:
                     to_send = msg
                 try:
@@ -234,7 +248,7 @@ def _check_alive(self):
         try:
             # We cannot use send here as it calls ensure_running, creating
             # a cycle.
-            os.write(self._fd, b'PROBE:0:noop\n')
+            os.write(self._fd, self._make_probe_message())
         except OSError:
             return False
         else:
@@ -253,11 +267,25 @@ def _write(self, msg):
         assert nbytes == len(msg), f"{nbytes=} != {len(msg)=}"
 
     def _send(self, cmd, name, rtype):
-        msg = f"{cmd}:{name}:{rtype}\n".encode("ascii")
-        if len(msg) > 512:
-            # posix guarantees that writes to a pipe of less than PIPE_BUF
-            # bytes are atomic, and that PIPE_BUF >= 512
-            raise ValueError('msg too long')
+        # POSIX guarantees that writes to a pipe of less than PIPE_BUF (512 on 
Linux)
+        # bytes are atomic. Therefore, we want the message to be shorter than 
512 bytes.
+        # POSIX shm_open() and sem_open() require the name, including its 
leading slash,
+        # to be at most NAME_MAX bytes (255 on Linux)
+        # With json.dump(..., ensure_ascii=True) every non-ASCII byte becomes 
a 6-char
+        # escape like \uDC80.
+        # As we want the overall message to be kept atomic and therefore 
smaller than 512,
+        # we encode encode the raw name bytes with URL-safe Base64 - so a 255 
long name
+        # will not exceed 340 bytes.
+        b = name.encode('utf-8', 'surrogateescape')
+        if len(b) > 255:
+            raise ValueError('shared memory name too long (max 255 bytes)')
+        b64 = base64.urlsafe_b64encode(b).decode('ascii')
+
+        payload = {"cmd": cmd, "rtype": rtype, "base64_name": b64}
+        msg = (json.dumps(payload, ensure_ascii=True, separators=(",", ":")) + 
"\n").encode("ascii")
+
+        # The entire JSON message is guaranteed < PIPE_BUF (512 bytes) by 
construction.
+        assert len(msg) <= 512, f"internal error: message too long ({len(msg)} 
bytes)"
 
         self._ensure_running_and_write(msg)
 
@@ -290,7 +318,23 @@ def main(fd):
         with open(fd, 'rb') as f:
             for line in f:
                 try:
-                    cmd, name, rtype = line.strip().decode('ascii').split(':')
+                    try:
+                        obj = json.loads(line.decode('ascii'))
+                    except Exception as e:
+                        raise ValueError("malformed resource_tracker message: 
%r" % (line,)) from e
+
+                    cmd = obj["cmd"]
+                    rtype = obj["rtype"]
+                    b64  = obj.get("base64_name", "")
+
+                    if not isinstance(cmd, str) or not isinstance(rtype, str) 
or not isinstance(b64, str):
+                        raise ValueError("malformed resource_tracker fields: 
%r" % (obj,))
+
+                    try:
+                        name = base64.urlsafe_b64decode(b64).decode('utf-8', 
'surrogateescape')
+                    except ValueError as e:
+                        raise ValueError("malformed resource_tracker 
base64_name: %r" % (b64,)) from e
+
                     cleanup_func = _CLEANUP_FUNCS.get(rtype, None)
                     if cleanup_func is None:
                         raise ValueError(
diff --git a/Lib/test/_test_multiprocessing.py 
b/Lib/test/_test_multiprocessing.py
index 03114f1701e0ce..d5cd25a7980468 100644
--- a/Lib/test/_test_multiprocessing.py
+++ b/Lib/test/_test_multiprocessing.py
@@ -7144,3 +7144,47 @@ class SemLock(_multiprocessing.SemLock):
         name = f'test_semlock_subclass-{os.getpid()}'
         s = SemLock(1, 0, 10, name, False)
         _multiprocessing.sem_unlink(name)
+
+
[email protected](HAS_SHMEM, "requires multiprocessing.shared_memory")
+class TestSharedMemoryNames(unittest.TestCase):
+    def 
test_that_shared_memory_name_with_colons_has_no_resource_tracker_errors(self):
+        # Test script that creates and cleans up shared memory with colon in 
name
+        test_script = textwrap.dedent("""
+            import sys
+            from multiprocessing import shared_memory
+            import time
+
+            # Test various patterns of colons in names
+            test_names = [
+                "a:b",
+                "a:b:c",
+                "test:name:with:many:colons",
+                ":starts:with:colon",
+                "ends:with:colon:",
+                "::double::colons::",
+                "name\\nwithnewline",
+                "name-with-trailing-newline\\n",
+                "\\nname-starts-with-newline",
+                "colons:and\\nnewlines:mix",
+                "multi\\nline\\nname",
+            ]
+
+            for name in test_names:
+                try:
+                    shm = shared_memory.SharedMemory(create=True, size=100, 
name=name)
+                    shm.buf[:5] = b'hello'  # Write something to the shared 
memory
+                    shm.close()
+                    shm.unlink()
+
+                except Exception as e:
+                    print(f"Error with name '{name}': {e}", file=sys.stderr)
+                    sys.exit(1)
+
+            print("SUCCESS")
+        """)
+
+        rc, out, err = script_helper.assert_python_ok("-c", test_script)
+        self.assertIn(b"SUCCESS", out)
+        self.assertNotIn(b"traceback", err.lower(), err)
+        self.assertNotIn(b"resource_tracker.py", err, err)
diff --git 
a/Misc/NEWS.d/next/Library/2025-09-03-20-18-39.gh-issue-98896.tjez89.rst 
b/Misc/NEWS.d/next/Library/2025-09-03-20-18-39.gh-issue-98896.tjez89.rst
new file mode 100644
index 00000000000000..6831499c0afb43
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-09-03-20-18-39.gh-issue-98896.tjez89.rst
@@ -0,0 +1,2 @@
+Fix a failure in multiprocessing resource_tracker when SharedMemory names 
contain colons.
+Patch by Rani Pinchuk.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to