This is an automated email from the ASF dual-hosted git repository.

tlopex pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new a7463e9b2d [RPC][Tracker] Bound msg_size to MAX_TRACKER_MSG_BYTES to 
prevent unbounded buffer growth (#19586)
a7463e9b2d is described below

commit a7463e9b2da4bed070db2938caaed9ca030b49a6
Author: Bl4ckSku11 <[email protected]>
AuthorDate: Sat May 23 23:07:03 2026 -0500

    [RPC][Tracker] Bound msg_size to MAX_TRACKER_MSG_BYTES to prevent unbounded 
buffer growth (#19586)
    
    Fixes #<issue-number>.
    
    Reads of `_msg_size` from the tracker socket are now bounded to
    `MAX_TRACKER_MSG_BYTES = 1 MiB`, and the 4-byte size header is
    consumed at read time. Without these checks, a single TCP connection
    from a peer can grow the tracker process buffer until OOM, and a wire
    size of 0 starves the parser without ever freeing the bytes.
    
    Per the TVM security model the tracker is deployed on trusted networks,
    so this is filed as a robustness defect, not a security advisory.
    Apache security team triage (private thread, 2026-05-17) confirmed this
    is the right channel.
    
    ### Test
    Added regression test in tests/python/contrib/test_rpc_tracker.py that
    completes the magic handshake, sends an oversized msg_size header
    (0x7FFFFFFF), and asserts the tracker closes the connection.
    
    ### Changes
    - python/tvm/rpc/tracker.py: bound `_msg_size` to (0,
    MAX_TRACKER_MSG_BYTES], consume size header on read.
    - tests/python/contrib/test_rpc_tracker.py: regression test.
---
 python/tvm/rpc/tracker.py                | 31 ++++++++++++++++++----
 tests/python/contrib/test_rpc_tracker.py | 44 ++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/python/tvm/rpc/tracker.py b/python/tvm/rpc/tracker.py
index 81fe2feb69..0714c64fc9 100644
--- a/python/tvm/rpc/tracker.py
+++ b/python/tvm/rpc/tracker.py
@@ -77,6 +77,12 @@ logger.addHandler(console_handler)
 logger.setLevel(logging.INFO)
 logger.propagate = False
 
+# Maximum size in bytes for a single tracker message. Tracker frames carry
+# small JSON command tuples; 1 MiB is well above any legitimate payload and
+# bounds memory growth when a peer sends an oversized or malformed size
+# header on the wire.
+MAX_TRACKER_MSG_BYTES = 1 << 20
+
 
 class Scheduler:
     """Abstract interface of scheduler."""
@@ -224,14 +230,29 @@ class TCPEventHandler(tornado_util.TCPHandler):
             if self._msg_size == 0:
                 if len(self._data) >= 4:
                     self._msg_size = struct.unpack("<i", self._data[:4])[0]
+                    if self._msg_size <= 0 or self._msg_size > 
MAX_TRACKER_MSG_BYTES:
+                        logger.warning(
+                            "Invalid msg_size %d from %s; closing connection",
+                            self._msg_size,
+                            self.name(),
+                        )
+                        self.close()
+                        return
+                    del self._data[:4]
                 else:
                     return
-            if self._msg_size != 0 and len(self._data) >= self._msg_size + 4:
-                msg = py_str(bytes(self._data[4 : 4 + self._msg_size]))
-                del self._data[: 4 + self._msg_size]
+            if self._msg_size != 0 and len(self._data) >= self._msg_size:
+                msg = py_str(bytes(self._data[: self._msg_size]))
+                del self._data[: self._msg_size]
                 self._msg_size = 0
-                # pylint: disable=broad-except
-                self.call_handler(json.loads(msg))
+                try:
+                    self.call_handler(json.loads(msg))
+                except Exception:  # pylint: disable=broad-except
+                    logger.warning(
+                        "Error handling message from %s", self.name(), 
exc_info=True
+                    )
+                    self.close()
+                    return
             else:
                 return
 
diff --git a/tests/python/contrib/test_rpc_tracker.py 
b/tests/python/contrib/test_rpc_tracker.py
index 37db25982b..486d5abce4 100644
--- a/tests/python/contrib/test_rpc_tracker.py
+++ b/tests/python/contrib/test_rpc_tracker.py
@@ -105,6 +105,50 @@ def check_server_drop():
         print("Skip because tornado is not available")
 
 
+def check_tracker_rejects_oversized_msg_size():
+    """Tracker must reject an oversized msg_size header and close the 
connection
+    instead of buffering an unbounded amount of data on a single TCP 
connection.
+
+    Regression test for the unbounded buffer growth defect in
+    TCPEventHandler.on_message. See MAX_TRACKER_MSG_BYTES in tracker.py.
+    """
+    try:
+        # pylint: disable=import-outside-toplevel
+        import socket
+        import struct
+
+        from tvm.rpc import base, tracker
+
+        tserver = tracker.Tracker(port=9180, port_end=9290, silent=True)
+        try:
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.settimeout(5)
+            sock.connect(("127.0.0.1", tserver.port))
+            # complete the 4-byte magic handshake
+            sock.sendall(struct.pack("<i", base.RPC_TRACKER_MAGIC))
+            magic_reply = sock.recv(4)
+            assert struct.unpack("<i", magic_reply)[0] == 
base.RPC_TRACKER_MAGIC
+
+            # send an oversized msg_size header (2 GiB)
+            sock.sendall(struct.pack("<i", 0x7FFFFFFF))
+
+            # server must close the connection (no payload buffering)
+            for _ in range(20):
+                chunk = sock.recv(4096)
+                if chunk == b"":
+                    break
+                time.sleep(0.05)
+            else:
+                raise AssertionError(
+                    "tracker did not close connection after oversized msg_size"
+                )
+        finally:
+            tserver.terminate()
+    except ImportError:
+        print("Skip because tornado is not available")
+
+
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
     check_server_drop()
+    check_tracker_rejects_oversized_msg_size()

Reply via email to