https://github.com/python/cpython/commit/d3d4cf943209d8f27084af621235aa382ba287b1
commit: d3d4cf943209d8f27084af621235aa382ba287b1
branch: main
author: Pablo Galindo Salgado <[email protected]>
committer: pablogsal <[email protected]>
date: 2025-12-26T16:06:48Z
summary:
gh-140739: Fix crashes from corrupted remote memory (#143190)
files:
A Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst
M Modules/_remote_debugging/_remote_debugging.h
M Modules/_remote_debugging/asyncio.c
M Modules/_remote_debugging/code_objects.c
M Modules/_remote_debugging/frames.c
M Modules/_remote_debugging/module.c
M Modules/_remote_debugging/object_reading.c
diff --git
a/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst
b/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst
new file mode 100644
index 00000000000000..ae93cbb51114f1
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-26-14-51-50.gh-issue-140739.BAbZTo.rst
@@ -0,0 +1,2 @@
+Fix several crashes due to reading invalid memory in the new Tachyon
+sampling profiler. Patch by Pablo Galindo.
diff --git a/Modules/_remote_debugging/_remote_debugging.h
b/Modules/_remote_debugging/_remote_debugging.h
index 4119d916be7a63..78add74423b608 100644
--- a/Modules/_remote_debugging/_remote_debugging.h
+++ b/Modules/_remote_debugging/_remote_debugging.h
@@ -140,6 +140,11 @@ typedef enum _WIN32_THREADSTATE {
#define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state)
#define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState)
+/* Maximum sizes for validation to prevent buffer overflows from corrupted
data */
+#define MAX_STACK_CHUNK_SIZE (16 * 1024 * 1024) /* 16 MB max for stack chunks
*/
+#define MAX_LONG_DIGITS 64 /* Allows values up to ~2^1920 */
+#define MAX_SET_TABLE_SIZE (1 << 20) /* 1 million entries max for set
iteration */
+
#ifndef MAX
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif
@@ -451,6 +456,7 @@ extern PyObject *make_frame_info(
extern bool parse_linetable(
const uintptr_t addrq,
const char* linetable,
+ Py_ssize_t linetable_size,
int firstlineno,
LocationInfo* info
);
diff --git a/Modules/_remote_debugging/asyncio.c
b/Modules/_remote_debugging/asyncio.c
index 7f91f16e3a2ce6..3fcc939fd0e876 100644
--- a/Modules/_remote_debugging/asyncio.c
+++ b/Modules/_remote_debugging/asyncio.c
@@ -112,9 +112,17 @@ iterate_set_entries(
}
Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object,
unwinder->debug_offsets.set_object.used);
- Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object,
unwinder->debug_offsets.set_object.mask) + 1;
+ Py_ssize_t mask = GET_MEMBER(Py_ssize_t, set_object,
unwinder->debug_offsets.set_object.mask);
uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object,
unwinder->debug_offsets.set_object.table);
+ // Validate mask and num_els to prevent huge loop iterations from garbage
data
+ if (mask < 0 || mask >= MAX_SET_TABLE_SIZE || num_els < 0 || num_els >
mask + 1) {
+ set_exception_cause(unwinder, PyExc_RuntimeError,
+ "Invalid set object (corrupted remote memory)");
+ return -1;
+ }
+ Py_ssize_t set_len = mask + 1;
+
Py_ssize_t i = 0;
Py_ssize_t els = 0;
while (i < set_len && els < num_els) {
@@ -812,14 +820,15 @@ append_awaited_by_for_thread(
return -1;
}
- if (GET_MEMBER(uintptr_t, task_node,
unwinder->debug_offsets.llist_node.next) == 0) {
+ uintptr_t next_node = GET_MEMBER(uintptr_t, task_node,
unwinder->debug_offsets.llist_node.next);
+ if (next_node == 0) {
PyErr_SetString(PyExc_RuntimeError,
"Invalid linked list structure reading remote
memory");
set_exception_cause(unwinder, PyExc_RuntimeError, "NULL pointer in
task linked list");
return -1;
}
- uintptr_t task_addr = (uintptr_t)GET_MEMBER(uintptr_t, task_node,
unwinder->debug_offsets.llist_node.next)
+ uintptr_t task_addr = next_node
-
(uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node;
if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) {
@@ -830,7 +839,7 @@ append_awaited_by_for_thread(
// Read next node
if (_Py_RemoteDebug_PagedReadRemoteMemory(
&unwinder->handle,
- (uintptr_t)GET_MEMBER(uintptr_t, task_node,
unwinder->debug_offsets.llist_node.next),
+ next_node,
sizeof(task_node),
task_node) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read
next task node in awaited_by");
diff --git a/Modules/_remote_debugging/code_objects.c
b/Modules/_remote_debugging/code_objects.c
index ca6ffe7a00af60..9b7b4dc22b873b 100644
--- a/Modules/_remote_debugging/code_objects.c
+++ b/Modules/_remote_debugging/code_objects.c
@@ -123,44 +123,74 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder,
uintptr_t code_addr, uintptr_t
* LINE TABLE PARSING FUNCTIONS
*
============================================================================ */
+// Inline helper for bounds-checked byte reading (no function call overhead)
+static inline int
+read_byte(const uint8_t **ptr, const uint8_t *end, uint8_t *out)
+{
+ if (*ptr >= end) {
+ return -1;
+ }
+ *out = *(*ptr)++;
+ return 0;
+}
+
static int
-scan_varint(const uint8_t **ptr)
+scan_varint(const uint8_t **ptr, const uint8_t *end)
{
- unsigned int read = **ptr;
- *ptr = *ptr + 1;
+ uint8_t read;
+ if (read_byte(ptr, end, &read) < 0) {
+ return -1;
+ }
unsigned int val = read & 63;
unsigned int shift = 0;
while (read & 64) {
- read = **ptr;
- *ptr = *ptr + 1;
+ if (read_byte(ptr, end, &read) < 0) {
+ return -1;
+ }
shift += 6;
+ // Prevent infinite loop on malformed data (shift overflow)
+ if (shift > 28) {
+ return -1;
+ }
val |= (read & 63) << shift;
}
- return val;
+ return (int)val;
}
static int
-scan_signed_varint(const uint8_t **ptr)
+scan_signed_varint(const uint8_t **ptr, const uint8_t *end)
{
- unsigned int uval = scan_varint(ptr);
+ int uval = scan_varint(ptr, end);
+ if (uval < 0) {
+ return INT_MIN; // Error sentinel (valid signed varints won't be
INT_MIN)
+ }
if (uval & 1) {
- return -(int)(uval >> 1);
+ return -(int)((unsigned int)uval >> 1);
}
else {
- return uval >> 1;
+ return (int)((unsigned int)uval >> 1);
}
}
bool
-parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno,
LocationInfo* info)
+parse_linetable(const uintptr_t addrq, const char* linetable, Py_ssize_t
linetable_size,
+ int firstlineno, LocationInfo* info)
{
+ // Reject garbage: zero or negative size
+ if (linetable_size <= 0) {
+ return false;
+ }
+
const uint8_t* ptr = (const uint8_t*)(linetable);
+ const uint8_t* end = ptr + linetable_size;
uintptr_t addr = 0;
int computed_line = firstlineno; // Running accumulator, separate from
output
+ int val; // Temporary for varint results
+ uint8_t byte; // Temporary for byte reads
const size_t MAX_LINETABLE_ENTRIES = 65536;
size_t entry_count = 0;
- while (*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
+ while (ptr < end && *ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
entry_count++;
uint8_t first_byte = *(ptr++);
uint8_t code = (first_byte >> 3) & 15;
@@ -173,14 +203,34 @@ parse_linetable(const uintptr_t addrq, const char*
linetable, int firstlineno, L
info->column = info->end_column = -1;
break;
case PY_CODE_LOCATION_INFO_LONG:
- computed_line += scan_signed_varint(&ptr);
+ val = scan_signed_varint(&ptr, end);
+ if (val == INT_MIN) {
+ return false;
+ }
+ computed_line += val;
info->lineno = computed_line;
- info->end_lineno = computed_line + scan_varint(&ptr);
- info->column = scan_varint(&ptr) - 1;
- info->end_column = scan_varint(&ptr) - 1;
+ val = scan_varint(&ptr, end);
+ if (val < 0) {
+ return false;
+ }
+ info->end_lineno = computed_line + val;
+ val = scan_varint(&ptr, end);
+ if (val < 0) {
+ return false;
+ }
+ info->column = val - 1;
+ val = scan_varint(&ptr, end);
+ if (val < 0) {
+ return false;
+ }
+ info->end_column = val - 1;
break;
case PY_CODE_LOCATION_INFO_NO_COLUMNS:
- computed_line += scan_signed_varint(&ptr);
+ val = scan_signed_varint(&ptr, end);
+ if (val == INT_MIN) {
+ return false;
+ }
+ computed_line += val;
info->lineno = info->end_lineno = computed_line;
info->column = info->end_column = -1;
break;
@@ -189,17 +239,25 @@ parse_linetable(const uintptr_t addrq, const char*
linetable, int firstlineno, L
case PY_CODE_LOCATION_INFO_ONE_LINE2:
computed_line += code - 10;
info->lineno = info->end_lineno = computed_line;
- info->column = *(ptr++);
- info->end_column = *(ptr++);
+ if (read_byte(&ptr, end, &byte) < 0) {
+ return false;
+ }
+ info->column = byte;
+ if (read_byte(&ptr, end, &byte) < 0) {
+ return false;
+ }
+ info->end_column = byte;
break;
default: {
- uint8_t second_byte = *(ptr++);
- if ((second_byte & 128) != 0) {
+ if (read_byte(&ptr, end, &byte) < 0) {
+ return false;
+ }
+ if ((byte & 128) != 0) {
return false;
}
info->lineno = info->end_lineno = computed_line;
- info->column = code << 3 | (second_byte >> 4);
- info->end_column = info->column + (second_byte & 15);
+ info->column = code << 3 | (byte >> 4);
+ info->end_column = info->column + (byte & 15);
break;
}
}
@@ -384,8 +442,14 @@ parse_code_object(RemoteUnwinderObject *unwinder,
tlbc_entry = get_tlbc_cache_entry(unwinder, real_address,
unwinder->tlbc_generation);
}
- if (tlbc_entry && ctx->tlbc_index < tlbc_entry->tlbc_array_size) {
- assert(ctx->tlbc_index >= 0);
+ // Validate tlbc_index and check TLBC cache
+ if (tlbc_entry) {
+ // Validate index bounds (also catches negative values since
tlbc_index is signed)
+ if (ctx->tlbc_index < 0 || ctx->tlbc_index >=
tlbc_entry->tlbc_array_size) {
+ set_exception_cause(unwinder, PyExc_RuntimeError,
+ "Invalid tlbc_index (corrupted remote memory)");
+ goto error;
+ }
assert(tlbc_entry->tlbc_array_size > 0);
// Use cached TLBC data
uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array +
sizeof(Py_ssize_t));
@@ -398,7 +462,7 @@ parse_code_object(RemoteUnwinderObject *unwinder,
}
}
- // Fall back to main bytecode
+ // Fall back to main bytecode (no tlbc_entry or tlbc_bytecode_addr was 0)
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
done_tlbc:
@@ -409,6 +473,7 @@ parse_code_object(RemoteUnwinderObject *unwinder,
; // Empty statement to avoid C23 extension warning
LocationInfo info = {0};
bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable),
+ PyBytes_GET_SIZE(meta->linetable),
meta->first_lineno, &info);
if (!ok) {
info.lineno = -1;
diff --git a/Modules/_remote_debugging/frames.c
b/Modules/_remote_debugging/frames.c
index d9fece6459875a..02c48205b85a37 100644
--- a/Modules/_remote_debugging/frames.c
+++ b/Modules/_remote_debugging/frames.c
@@ -45,6 +45,15 @@ process_single_stack_chunk(
// Check actual size and reread if necessary
size_t actual_size = GET_MEMBER(size_t, this_chunk,
offsetof(_PyStackChunk, size));
if (actual_size != current_size) {
+ // Validate size: reject garbage (too small or unreasonably large)
+ // Size must be at least enough for the header and reasonably bounded
+ if (actual_size <= offsetof(_PyStackChunk, data) || actual_size >
MAX_STACK_CHUNK_SIZE) {
+ PyMem_RawFree(this_chunk);
+ set_exception_cause(unwinder, PyExc_RuntimeError,
+ "Invalid stack chunk size (corrupted remote memory)");
+ return -1;
+ }
+
this_chunk = PyMem_RawRealloc(this_chunk, actual_size);
if (!this_chunk) {
PyErr_NoMemory();
@@ -129,7 +138,11 @@ void *
find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr)
{
for (size_t i = 0; i < chunks->count; ++i) {
- assert(chunks->chunks[i].size > offsetof(_PyStackChunk, data));
+ // Validate size: reject garbage that would cause underflow
+ if (chunks->chunks[i].size <= offsetof(_PyStackChunk, data)) {
+ // Skip this chunk - corrupted size from remote memory
+ continue;
+ }
uintptr_t base = chunks->chunks[i].remote_addr +
offsetof(_PyStackChunk, data);
size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk,
data);
diff --git a/Modules/_remote_debugging/module.c
b/Modules/_remote_debugging/module.c
index e14a23357fb400..737787a331f948 100644
--- a/Modules/_remote_debugging/module.c
+++ b/Modules/_remote_debugging/module.c
@@ -584,6 +584,7 @@
_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
}
while (current_tstate != 0) {
+ uintptr_t prev_tstate = current_tstate;
PyObject* frame_info = unwind_stack_for_thread(self,
¤t_tstate,
gil_holder_tstate,
gc_frame);
@@ -591,6 +592,14 @@
_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
// Check if this was an intentional skip due to mode-based
filtering
if ((self->mode == PROFILING_MODE_CPU || self->mode ==
PROFILING_MODE_GIL ||
self->mode == PROFILING_MODE_EXCEPTION) &&
!PyErr_Occurred()) {
+ // Detect cycle: if current_tstate didn't advance, we have
corrupted data
+ if (current_tstate == prev_tstate) {
+ Py_DECREF(interpreter_threads);
+ set_exception_cause(self, PyExc_RuntimeError,
+ "Thread list cycle detected (corrupted remote
memory)");
+ Py_CLEAR(result);
+ goto exit;
+ }
// Thread was skipped due to mode filtering, continue to
next thread
continue;
}
diff --git a/Modules/_remote_debugging/object_reading.c
b/Modules/_remote_debugging/object_reading.c
index 2f465ca0cac41d..447b7fd5926064 100644
--- a/Modules/_remote_debugging/object_reading.c
+++ b/Modules/_remote_debugging/object_reading.c
@@ -194,9 +194,21 @@ read_py_long(
return 0;
}
- // If the long object has inline digits, use them directly
+ // Validate size: reject garbage (negative or unreasonably large)
+ if (size < 0 || size > MAX_LONG_DIGITS) {
+ set_exception_cause(unwinder, PyExc_RuntimeError,
+ "Invalid PyLong size (corrupted remote memory)");
+ return -1;
+ }
+
+ // Calculate how many digits fit inline in our local buffer
+ Py_ssize_t ob_digit_offset = unwinder->debug_offsets.long_object.ob_digit;
+ Py_ssize_t inline_digits_space = SIZEOF_LONG_OBJ - ob_digit_offset;
+ Py_ssize_t max_inline_digits = inline_digits_space /
(Py_ssize_t)sizeof(digit);
+
+ // If the long object has inline digits that fit in our buffer, use them
directly
digit *digits;
- if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
+ if (size <= max_inline_digits && size <= _PY_NSMALLNEGINTS +
_PY_NSMALLPOSINTS) {
// For small integers, digits are inline in the long_value.ob_digit
array
digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
if (!digits) {
@@ -204,7 +216,7 @@ read_py_long(
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to
allocate digits for small PyLong");
return -1;
}
- memcpy(digits, long_obj +
unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit));
+ memcpy(digits, long_obj + ob_digit_offset, size * sizeof(digit));
} else {
// For larger integers, we need to read the digits separately
digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]