https://github.com/python/cpython/commit/0c01090ad957de4625f504ce4f29df0a05d09fba
commit: 0c01090ad957de4625f504ce4f29df0a05d09fba
branch: main
author: Mark Shannon <[email protected]>
committer: markshannon <[email protected]>
date: 2025-10-21T15:22:15+01:00
summary:
GH-139951: Fix major GC performance regression (GH-140262)
* Count number of actually tracked objects, instead of trackable objects. This
ensures that untracking tuples has the desired effect of reducing GC overhead
* Do not track most untrackable tuples during creation. This prevents large
numbers of small tuples causing execessive GCs.
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2025-10-17-18-03-12.gh-issue-139951.IdwM2O.rst
M Include/internal/pycore_gc.h
M Lib/test/test_gc.py
M Objects/tupleobject.c
M Python/gc.c
diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index a6519aa086309d..fd284d0e4ecc2f 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -205,6 +205,12 @@ static inline void _PyGC_CLEAR_FINALIZED(PyObject *op) {
#endif
}
+extern void _Py_ScheduleGC(PyThreadState *tstate);
+
+#ifndef Py_GIL_DISABLED
+extern void _Py_TriggerGC(struct _gc_runtime_state *gcstate);
+#endif
+
/* Tell the GC to track this object.
*
@@ -238,14 +244,19 @@ static inline void _PyObject_GC_TRACK(
"object is in generation which is garbage collected",
filename, lineno, __func__);
- PyInterpreterState *interp = _PyInterpreterState_GET();
- PyGC_Head *generation0 = &interp->gc.young.head;
+ struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
+ PyGC_Head *generation0 = &gcstate->young.head;
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
_PyGCHead_SET_NEXT(last, gc);
_PyGCHead_SET_PREV(gc, last);
- uintptr_t not_visited = 1 ^ interp->gc.visited_space;
+ uintptr_t not_visited = 1 ^ gcstate->visited_space;
gc->_gc_next = ((uintptr_t)generation0) | not_visited;
generation0->_gc_prev = (uintptr_t)gc;
+ gcstate->young.count++; /* number of tracked GC objects */
+ gcstate->heap_size++;
+ if (gcstate->young.count > gcstate->young.threshold) {
+ _Py_TriggerGC(gcstate);
+ }
#endif
}
@@ -280,6 +291,11 @@ static inline void _PyObject_GC_UNTRACK(
_PyGCHead_SET_PREV(next, prev);
gc->_gc_next = 0;
gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED;
+ struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
+ if (gcstate->young.count > 0) {
+ gcstate->young.count--;
+ }
+ gcstate->heap_size--;
#endif
}
@@ -343,7 +359,6 @@ extern PyObject *_PyGC_GetReferrers(PyInterpreterState
*interp, PyObject *objs);
// Functions to clear types free lists
extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
-extern void _Py_ScheduleGC(PyThreadState *tstate);
extern void _Py_RunGC(PyThreadState *tstate);
union _PyStackRef;
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index 08055bb8e6bd41..10c3a622107714 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -1473,10 +1473,11 @@ def callback(ignored):
# The free-threaded build doesn't have multiple generations, so
# just trigger a GC manually.
gc.collect()
+ assert not detector.gc_happened
while not detector.gc_happened:
i += 1
- if i > 10000:
- self.fail("gc didn't happen after 10000 iterations")
+ if i > 100000:
+ self.fail("gc didn't happen after 100000 iterations")
self.assertEqual(len(ouch), 0)
junk.append([]) # this will eventually trigger gc
@@ -1548,8 +1549,8 @@ def __del__(self):
gc.collect()
while not detector.gc_happened:
i += 1
- if i > 10000:
- self.fail("gc didn't happen after 10000 iterations")
+ if i > 50000:
+ self.fail("gc didn't happen after 50000 iterations")
self.assertEqual(len(ouch), 0)
junk.append([]) # this will eventually trigger gc
@@ -1566,8 +1567,8 @@ def test_indirect_calls_with_gc_disabled(self):
detector = GC_Detector()
while not detector.gc_happened:
i += 1
- if i > 10000:
- self.fail("gc didn't happen after 10000 iterations")
+ if i > 100000:
+ self.fail("gc didn't happen after 100000 iterations")
junk.append([]) # this will eventually trigger gc
try:
@@ -1577,11 +1578,11 @@ def test_indirect_calls_with_gc_disabled(self):
detector = GC_Detector()
while not detector.gc_happened:
i += 1
- if i > 10000:
+ if i > 100000:
break
junk.append([]) # this may eventually trigger gc (if it is
enabled)
- self.assertEqual(i, 10001)
+ self.assertEqual(i, 100001)
finally:
gc.enable()
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-17-18-03-12.gh-issue-139951.IdwM2O.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-17-18-03-12.gh-issue-139951.IdwM2O.rst
new file mode 100644
index 00000000000000..e03996188a7e22
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-17-18-03-12.gh-issue-139951.IdwM2O.rst
@@ -0,0 +1,7 @@
+Fixes a regression in GC performance for a growing heap composed mostly of
+small tuples.
+
+* Counts number of actually tracked objects, instead of trackable objects.
+ This ensures that untracking tuples has the desired effect of reducing GC
overhead.
+* Does not track most untrackable tuples during creation.
+ This prevents large numbers of small tuples causing excessive GCs.
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index cd90b06d499faf..169ac69701da11 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -156,6 +156,18 @@ _PyTuple_MaybeUntrack(PyObject *op)
_PyObject_GC_UNTRACK(op);
}
+/* Fast, but conservative check if an object maybe tracked
+ May return true for an object that is not tracked,
+ Will always return true for an object that is tracked.
+ This is a temporary workaround until _PyObject_GC_IS_TRACKED
+ becomes fast and safe to call on non-GC objects.
+*/
+static bool
+maybe_tracked(PyObject *ob)
+{
+ return _PyType_IS_GC(Py_TYPE(ob));
+}
+
PyObject *
PyTuple_Pack(Py_ssize_t n, ...)
{
@@ -163,6 +175,7 @@ PyTuple_Pack(Py_ssize_t n, ...)
PyObject *o;
PyObject **items;
va_list vargs;
+ bool track = false;
if (n == 0) {
return tuple_get_empty();
@@ -177,10 +190,15 @@ PyTuple_Pack(Py_ssize_t n, ...)
items = result->ob_item;
for (i = 0; i < n; i++) {
o = va_arg(vargs, PyObject *);
+ if (!track && maybe_tracked(o)) {
+ track = true;
+ }
items[i] = Py_NewRef(o);
}
va_end(vargs);
- _PyObject_GC_TRACK(result);
+ if (track) {
+ _PyObject_GC_TRACK(result);
+ }
return (PyObject *)result;
}
@@ -377,11 +395,17 @@ PyTuple_FromArray(PyObject *const *src, Py_ssize_t n)
return NULL;
}
PyObject **dst = tuple->ob_item;
+ bool track = false;
for (Py_ssize_t i = 0; i < n; i++) {
PyObject *item = src[i];
+ if (!track && maybe_tracked(item)) {
+ track = true;
+ }
dst[i] = Py_NewRef(item);
}
- _PyObject_GC_TRACK(tuple);
+ if (track) {
+ _PyObject_GC_TRACK(tuple);
+ }
return (PyObject *)tuple;
}
@@ -396,10 +420,17 @@ _PyTuple_FromStackRefStealOnSuccess(const _PyStackRef
*src, Py_ssize_t n)
return NULL;
}
PyObject **dst = tuple->ob_item;
+ bool track = false;
for (Py_ssize_t i = 0; i < n; i++) {
- dst[i] = PyStackRef_AsPyObjectSteal(src[i]);
+ PyObject *item = PyStackRef_AsPyObjectSteal(src[i]);
+ if (!track && maybe_tracked(item)) {
+ track = true;
+ }
+ dst[i] = item;
+ }
+ if (track) {
+ _PyObject_GC_TRACK(tuple);
}
- _PyObject_GC_TRACK(tuple);
return (PyObject *)tuple;
}
diff --git a/Python/gc.c b/Python/gc.c
index fd724301c769bf..a1f3d86d91036b 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1639,7 +1639,7 @@ assess_work_to_do(GCState *gcstate)
scale_factor = 2;
}
intptr_t new_objects = gcstate->young.count;
- intptr_t max_heap_fraction = new_objects*3/2;
+ intptr_t max_heap_fraction = new_objects*2;
intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR /
scale_factor;
if (heap_fraction > max_heap_fraction) {
heap_fraction = max_heap_fraction;
@@ -1654,6 +1654,9 @@ gc_collect_increment(PyThreadState *tstate, struct
gc_collection_stats *stats)
GC_STAT_ADD(1, collections, 1);
GCState *gcstate = &tstate->interp->gc;
gcstate->work_to_do += assess_work_to_do(gcstate);
+ if (gcstate->work_to_do < 0) {
+ return;
+ }
untrack_tuples(&gcstate->young.head);
if (gcstate->phase == GC_PHASE_MARK) {
Py_ssize_t objects_marked = mark_at_start(tstate);
@@ -1696,7 +1699,6 @@ gc_collect_increment(PyThreadState *tstate, struct
gc_collection_stats *stats)
gc_collect_region(tstate, &increment, &survivors, stats);
gc_list_merge(&survivors, visited);
assert(gc_list_is_empty(&increment));
- gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR /
scale_factor;
gcstate->work_to_do -= increment_size;
add_stats(gcstate, 1, stats);
@@ -2299,21 +2301,11 @@ _Py_ScheduleGC(PyThreadState *tstate)
}
void
-_PyObject_GC_Link(PyObject *op)
+_Py_TriggerGC(struct _gc_runtime_state *gcstate)
{
- PyGC_Head *gc = AS_GC(op);
- // gc must be correctly aligned
- _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
-
PyThreadState *tstate = _PyThreadState_GET();
- GCState *gcstate = &tstate->interp->gc;
- gc->_gc_next = 0;
- gc->_gc_prev = 0;
- gcstate->young.count++; /* number of allocated GC objects */
- gcstate->heap_size++;
- if (gcstate->young.count > gcstate->young.threshold &&
- gcstate->enabled &&
- gcstate->young.threshold &&
+ if (gcstate->enabled &&
+ gcstate->young.threshold != 0 &&
!_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
!_PyErr_Occurred(tstate))
{
@@ -2321,6 +2313,17 @@ _PyObject_GC_Link(PyObject *op)
}
}
+void
+_PyObject_GC_Link(PyObject *op)
+{
+ PyGC_Head *gc = AS_GC(op);
+ // gc must be correctly aligned
+ _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
+ gc->_gc_next = 0;
+ gc->_gc_prev = 0;
+
+}
+
void
_Py_RunGC(PyThreadState *tstate)
{
@@ -2427,6 +2430,11 @@ PyObject_GC_Del(void *op)
PyGC_Head *g = AS_GC(op);
if (_PyObject_GC_IS_TRACKED(op)) {
gc_list_remove(g);
+ GCState *gcstate = get_gc_state();
+ if (gcstate->young.count > 0) {
+ gcstate->young.count--;
+ }
+ gcstate->heap_size--;
#ifdef Py_DEBUG
PyObject *exc = PyErr_GetRaisedException();
if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
@@ -2440,11 +2448,6 @@ PyObject_GC_Del(void *op)
PyErr_SetRaisedException(exc);
#endif
}
- GCState *gcstate = get_gc_state();
- if (gcstate->young.count > 0) {
- gcstate->young.count--;
- }
- gcstate->heap_size--;
PyObject_Free(((char *)op)-presize);
}
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]